Update all documentation to import extraction strategies directly from crawl4ai.
This commit is contained in:
@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
|
||||
"""
|
||||
|
||||
from crawl4ai import AsyncWebCrawler
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
|
||||
import json
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
|
||||
"""
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, CacheMode
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
|
||||
import json
|
||||
from playwright.async_api import Page, BrowserContext
|
||||
|
||||
@@ -5,7 +5,7 @@ prices, ratings, and other details using CSS selectors.
|
||||
"""
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, CacheMode
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
|
||||
import json
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.script.c4a_compile import C4ACompiler
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.script.c4a_compile import C4ACompiler
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ import os
|
||||
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
from crawl4ai import LLMConfig
|
||||
from crawl4ai.extraction_strategy import (
|
||||
from crawl4ai import (
|
||||
LLMExtractionStrategy,
|
||||
JsonCssExtractionStrategy,
|
||||
JsonXPathExtractionStrategy,
|
||||
|
||||
@@ -518,7 +518,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from crawl4ai.extraction_strategy import LLMExtractionStrategy\n",
|
||||
"from crawl4ai import LLMExtractionStrategy\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"import os, json\n",
|
||||
"\n",
|
||||
@@ -594,7 +594,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from crawl4ai.extraction_strategy import CosineStrategy\n",
|
||||
"from crawl4ai import CosineStrategy\n",
|
||||
"\n",
|
||||
"async def cosine_similarity_extraction():\n",
|
||||
" async with AsyncWebCrawler() as crawler:\n",
|
||||
|
||||
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
|
||||
from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig
|
||||
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from crawl4ai.content_filter_strategy import PruningContentFilter
|
||||
from crawl4ai.extraction_strategy import (
|
||||
from crawl4ai import (
|
||||
JsonCssExtractionStrategy,
|
||||
LLMExtractionStrategy,
|
||||
)
|
||||
@@ -416,7 +416,7 @@ async def crawl_dynamic_content_pages_method_2():
|
||||
|
||||
|
||||
async def cosine_similarity_extraction():
|
||||
from crawl4ai.extraction_strategy import CosineStrategy
|
||||
from crawl4ai import CosineStrategy
|
||||
crawl_config = CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
extraction_strategy=CosineStrategy(
|
||||
|
||||
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
|
||||
from crawl4ai import AsyncWebCrawler, CacheMode, BrowserConfig, CrawlerRunConfig
|
||||
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from crawl4ai.content_filter_strategy import PruningContentFilter
|
||||
from crawl4ai.extraction_strategy import (
|
||||
from crawl4ai import (
|
||||
JsonCssExtractionStrategy,
|
||||
LLMExtractionStrategy,
|
||||
)
|
||||
@@ -416,7 +416,7 @@ async def crawl_dynamic_content_pages_method_2():
|
||||
|
||||
|
||||
async def cosine_similarity_extraction():
|
||||
from crawl4ai.extraction_strategy import CosineStrategy
|
||||
from crawl4ai import CosineStrategy
|
||||
crawl_config = CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS,
|
||||
extraction_strategy=CosineStrategy(
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import json
|
||||
from crawl4ai.web_crawler import WebCrawler
|
||||
from crawl4ai.chunking_strategy import *
|
||||
from crawl4ai.extraction_strategy import *
|
||||
from crawl4ai import *
|
||||
from crawl4ai.crawler_strategy import *
|
||||
|
||||
url = r"https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot"
|
||||
|
||||
@@ -18,7 +18,7 @@ from crawl4ai import RoundRobinProxyStrategy
|
||||
from crawl4ai.content_filter_strategy import LLMContentFilter
|
||||
from crawl4ai import DefaultMarkdownGenerator
|
||||
from crawl4ai import LLMConfig
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from crawl4ai import JsonCssExtractionStrategy
|
||||
from crawl4ai.processors.pdf import PDFCrawlerStrategy, PDFContentScrapingStrategy
|
||||
from pprint import pprint
|
||||
|
||||
|
||||
Reference in New Issue
Block a user