Update all documentation to import extraction strategies directly from crawl4ai.

This commit is contained in:
UncleCode
2025-06-10 18:08:27 +08:00
parent cab457e9c7
commit c0fd36982d
43 changed files with 7811 additions and 7803 deletions

View File

@@ -14,7 +14,7 @@ The Cosine Strategy:
## Basic Usage
```python
from crawl4ai.extraction_strategy import CosineStrategy
from crawl4ai import CosineStrategy
strategy = CosineStrategy(
semantic_filter="product reviews", # Target content type

View File

@@ -102,7 +102,7 @@ import json
from pydantic import BaseModel, Field
from typing import List
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai import LLMExtractionStrategy
class Product(BaseModel):
name: str
@@ -219,7 +219,7 @@ import asyncio
from typing import List
from pydantic import BaseModel, Field
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from crawl4ai import LLMExtractionStrategy
class Entity(BaseModel):
name: str

View File

@@ -38,7 +38,7 @@ Let's begin with a **simple** schema-based extraction using the `JsonCssExtracti
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
async def extract_crypto_prices():
# 1. Define a simple extraction schema
@@ -108,7 +108,7 @@ Below is a short example demonstrating **XPath** extraction plus the **`raw://`*
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonXPathExtractionStrategy
from crawl4ai import JsonXPathExtractionStrategy
async def extract_crypto_prices_xpath():
# 1. Minimal dummy HTML with some repeating rows
@@ -309,7 +309,7 @@ Key Takeaways:
import json
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
ecommerce_schema = {
# ... the advanced schema from above ...
@@ -649,7 +649,7 @@ While manually crafting schemas is powerful and precise, Crawl4AI now offers a c
The schema generator is available as a static method on both `JsonCssExtractionStrategy` and `JsonXPathExtractionStrategy`. You can choose between OpenAI's GPT-4 or the open-source Ollama for schema generation:
```python
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy, JsonXPathExtractionStrategy
from crawl4ai import LLMConfig
# Sample HTML with product information