Update all documentation to import extraction strategies directly from crawl4ai.

This commit is contained in:
UncleCode
2025-06-10 18:08:27 +08:00
parent cab457e9c7
commit c0fd36982d
43 changed files with 7811 additions and 7803 deletions

View File

@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
"""
from crawl4ai import AsyncWebCrawler
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
import json

View File

@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
"""
from crawl4ai import AsyncWebCrawler, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
import json
from playwright.async_api import Page, BrowserContext

View File

@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
"""
from crawl4ai import AsyncWebCrawler, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
import json

View File

@@ -20,7 +20,7 @@ from pathlib import Path
from typing import List, Dict, Any
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.script.c4a_compile import C4ACompiler

View File

@@ -20,7 +20,7 @@ from pathlib import Path
from typing import List, Dict, Any
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.script.c4a_compile import C4ACompiler

View File

@@ -12,7 +12,7 @@ import os
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
from crawl4ai import LLMConfig
from crawl4ai.extraction_strategy import (
from crawl4ai import (
LLMExtractionStrategy,
JsonCssExtractionStrategy,
JsonXPathExtractionStrategy,

View File

@@ -518,7 +518,7 @@
}
],
"source": [
"from crawl4ai.extraction_strategy import LLMExtractionStrategy\n",
"from crawl4ai import LLMExtractionStrategy\n",
"from pydantic import BaseModel, Field\n",
"import os, json\n",
"\n",
@@ -594,7 +594,7 @@
}
],
"source": [
"from crawl4ai.extraction_strategy import CosineStrategy\n",
"from crawl4ai import CosineStrategy\n",
"\n",
"async def cosine_similarity_extraction():\n",
" async with AsyncWebCrawler() as crawler:\n",

View File

@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from crawl4ai.content_filter_strategy import PruningContentFilter
from crawl4ai.extraction_strategy import (
from crawl4ai import (
JsonCssExtractionStrategy,
LLMExtractionStrategy,
)
@@ -416,7 +416,7 @@ async def crawl_dynamic_content_pages_method_2():
async def cosine_similarity_extraction():
from crawl4ai.extraction_strategy import CosineStrategy
from crawl4ai import CosineStrategy
crawl_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
extraction_strategy=CosineStrategy(

View File

@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from crawl4ai.content_filter_strategy import PruningContentFilter
from crawl4ai.extraction_strategy import (
from crawl4ai import (
JsonCssExtractionStrategy,
LLMExtractionStrategy,
)
@@ -416,7 +416,7 @@ async def crawl_dynamic_content_pages_method_2():
async def cosine_similarity_extraction():
from crawl4ai.extraction_strategy import CosineStrategy
from crawl4ai import CosineStrategy
crawl_config = CrawlerRunConfig(
cache_mode=CacheMode.BYPASS,
extraction_strategy=CosineStrategy(

View File

@@ -2,7 +2,7 @@ import os
import json
from crawl4ai.web_crawler import WebCrawler
from crawl4ai.chunking_strategy import *
from crawl4ai.extraction_strategy import *
from crawl4ai import *
from crawl4ai.crawler_strategy import *
url = r"https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot"

View File

@@ -18,7 +18,7 @@ from crawl4ai import RoundRobinProxyStrategy
from crawl4ai.content_filter_strategy import LLMContentFilter
from crawl4ai import DefaultMarkdownGenerator
from crawl4ai import LLMConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
from crawl4ai import JsonCssExtractionStrategy
from crawl4ai.processors.pdf import PDFCrawlerStrategy, PDFContentScrapingStrategy
from pprint import pprint