Update all documentation to import extraction strategies directly from crawl4ai.

This commit is contained in:
UncleCode
2025-06-10 18:08:27 +08:00
parent cab457e9c7
commit c0fd36982d
43 changed files with 7811 additions and 7803 deletions

View File

@@ -476,7 +476,7 @@ services:
```python
# Method 1: Create config objects and dump to see expected JSON structure
from crawl4ai import BrowserConfig, CrawlerRunConfig, LLMConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy, LLMExtractionStrategy
import json
# Create browser config and see JSON structure

View File

@@ -37,7 +37,7 @@ import json
from pydantic import BaseModel, Field
from typing import List
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai import LLMExtractionStrategy
class SentimentAnalysis(BaseModel):
"""Use LLM when you need semantic understanding"""

View File

@@ -39,7 +39,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -176,7 +176,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -342,7 +342,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -387,7 +387,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types

View File

@@ -1835,7 +1835,7 @@ import json
from pydantic import BaseModel, Field
from typing import List
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai import LLMExtractionStrategy
class SentimentAnalysis(BaseModel):
"""Use LLM when you need semantic understanding"""
@@ -2743,7 +2743,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -2880,7 +2880,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -3046,7 +3046,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -3091,7 +3091,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types
@@ -4711,7 +4711,7 @@ services:
```python
# Method 1: Create config objects and dump to see expected JSON structure
from crawl4ai import BrowserConfig, CrawlerRunConfig, LLMConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy, LLMExtractionStrategy
import json
# Create browser config and see JSON structure
@@ -5792,7 +5792,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -5929,7 +5929,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -6095,7 +6095,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -6140,7 +6140,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types

View File

@@ -1835,7 +1835,7 @@ import json
from pydantic import BaseModel, Field
from typing import List
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai import LLMExtractionStrategy
class SentimentAnalysis(BaseModel):
"""Use LLM when you need semantic understanding"""
@@ -2743,7 +2743,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -2880,7 +2880,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -3046,7 +3046,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -3091,7 +3091,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types
@@ -4711,7 +4711,7 @@ services:
```python
# Method 1: Create config objects and dump to see expected JSON structure
from crawl4ai import BrowserConfig, CrawlerRunConfig, LLMConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy, LLMExtractionStrategy
import json
# Create browser config and see JSON structure
@@ -5792,7 +5792,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -5929,7 +5929,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -6095,7 +6095,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -6140,7 +6140,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types

File diff suppressed because it is too large Load Diff