Refactor adaptive crawling state management

- Renamed `CrawlState` to `AdaptiveCrawlResult` to better reflect its purpose.
- Updated all references to `CrawlState` in the codebase, including method signatures and documentation.
- Modified the `AdaptiveCrawler` class to initialize and manage the new `AdaptiveCrawlResult` state.
- Adjusted example strategies and documentation to align with the new state class.
- Ensured all tests are updated to use `AdaptiveCrawlResult` instead of `CrawlState`.
This commit is contained in:
UncleCode
2025-07-24 20:11:43 +08:00
parent d1de82a332
commit 843457a9cb
12 changed files with 51 additions and 1898 deletions

View File

@@ -23,7 +23,7 @@ from crawl4ai import (
AsyncWebCrawler,
AdaptiveCrawler,
AdaptiveConfig,
CrawlState
AdaptiveCrawlResult
)

View File

@@ -13,7 +13,7 @@ import math
sys.path.append(str(Path(__file__).parent.parent))
from crawl4ai import AsyncWebCrawler
from crawl4ai.adaptive_crawler import CrawlState, StatisticalStrategy
from crawl4ai.adaptive_crawler import AdaptiveCrawlResult, StatisticalStrategy
from crawl4ai.models import CrawlResult
@@ -37,7 +37,7 @@ class ConfidenceTestHarness:
print("=" * 80)
# Initialize state
state = CrawlState(query=self.query)
state = AdaptiveCrawlResult(query=self.query)
# Create crawler
async with AsyncWebCrawler() as crawler:
@@ -107,7 +107,7 @@ class ConfidenceTestHarness:
state.metrics['prev_confidence'] = confidence
def _debug_coverage_calculation(self, state: CrawlState, query_terms: List[str]):
def _debug_coverage_calculation(self, state: AdaptiveCrawlResult, query_terms: List[str]):
"""Debug coverage calculation step by step"""
coverage_score = 0.0
max_possible_score = 0.0
@@ -136,7 +136,7 @@ class ConfidenceTestHarness:
new_coverage = self._calculate_coverage_new(state, query_terms)
print(f" → New Coverage: {new_coverage:.3f}")
def _calculate_coverage_new(self, state: CrawlState, query_terms: List[str]) -> float:
def _calculate_coverage_new(self, state: AdaptiveCrawlResult, query_terms: List[str]) -> float:
"""New coverage calculation without IDF"""
if not query_terms or state.total_documents == 0:
return 0.0

View File

@@ -15,7 +15,7 @@ import os
sys.path.append(str(Path(__file__).parent.parent.parent))
from crawl4ai import AsyncWebCrawler, AdaptiveCrawler, AdaptiveConfig
from crawl4ai.adaptive_crawler import EmbeddingStrategy, CrawlState
from crawl4ai.adaptive_crawler import EmbeddingStrategy, AdaptiveCrawlResult
from crawl4ai.models import CrawlResult
@@ -132,7 +132,7 @@ async def test_embedding_performance():
strategy.config = config
# Initialize state
state = CrawlState()
state = AdaptiveCrawlResult()
state.query = "async await coroutines event loops tasks"
# Start performance monitoring

View File

@@ -20,7 +20,7 @@ from crawl4ai import (
AsyncWebCrawler,
AdaptiveCrawler,
AdaptiveConfig,
CrawlState
AdaptiveCrawlResult
)
console = Console()