Update all documentation to import extraction strategies directly from crawl4ai.

This commit is contained in:
UncleCode
2025-06-10 18:08:27 +08:00
parent cab457e9c7
commit c0fd36982d
43 changed files with 7811 additions and 7803 deletions

View File

@@ -39,7 +39,7 @@ import json
import asyncio
from pathlib import Path
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def smart_extraction_workflow():
"""
@@ -176,7 +176,7 @@ social_schema = JsonCssExtractionStrategy.generate_schema(
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
# Manual schema for consistent product pages
simple_schema = {
@@ -342,7 +342,7 @@ asyncio.run(extract_complex_ecommerce())
### XPath Alternative (When CSS Isn't Enough)
```python
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
# XPath for more complex selections
xpath_schema = {
@@ -387,7 +387,7 @@ strategy = JsonXPathExtractionStrategy(xpath_schema, verbose=True)
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy
from crawl4ai import RegexExtractionStrategy
async def extract_common_patterns():
# Use built-in patterns for common data types