Refactor adaptive crawling state management

- Renamed `CrawlState` to `AdaptiveCrawlResult` to better reflect its purpose.
- Updated all references to `CrawlState` in the codebase, including method signatures and documentation.
- Modified the `AdaptiveCrawler` class to initialize and manage the new `AdaptiveCrawlResult` state.
- Adjusted example strategies and documentation to align with the new state class.
- Ensured all tests are updated to use `AdaptiveCrawlResult` instead of `CrawlState`.
This commit is contained in:
UncleCode
2025-07-24 20:11:43 +08:00
parent d1de82a332
commit 843457a9cb
12 changed files with 51 additions and 1898 deletions

View File

@@ -130,7 +130,7 @@ Factors:
```python
class CustomLinkScorer:
def score(self, link: Link, query: str, state: CrawlState) -> float:
def score(self, link: Link, query: str, state: AdaptiveCrawlResult) -> float:
# Prioritize specific URL patterns
if "/api/reference/" in link.href:
return 2.0 # Double the score
@@ -325,17 +325,17 @@ with open("crawl_analysis.json", "w") as f:
from crawl4ai.adaptive_crawler import BaseStrategy
class DomainSpecificStrategy(BaseStrategy):
def calculate_coverage(self, state: CrawlState) -> float:
def calculate_coverage(self, state: AdaptiveCrawlResult) -> float:
# Custom coverage calculation
# e.g., weight certain terms more heavily
pass
def calculate_consistency(self, state: CrawlState) -> float:
def calculate_consistency(self, state: AdaptiveCrawlResult) -> float:
# Custom consistency logic
# e.g., domain-specific validation
pass
def rank_links(self, links: List[Link], state: CrawlState) -> List[Link]:
def rank_links(self, links: List[Link], state: AdaptiveCrawlResult) -> List[Link]:
# Custom link ranking
# e.g., prioritize specific URL patterns
pass
@@ -359,7 +359,7 @@ class HybridStrategy(BaseStrategy):
URLPatternStrategy()
]
def calculate_confidence(self, state: CrawlState) -> float:
def calculate_confidence(self, state: AdaptiveCrawlResult) -> float:
# Weighted combination of strategies
scores = [s.calculate_confidence(state) for s in self.strategies]
weights = [0.5, 0.3, 0.2]