Parallel processing with retry on failure with exponential backoff - Simplified URL validation and normalisation - respecting Robots.txt

This commit is contained in:
Aravind Karnam
2024-09-19 12:34:12 +05:30
parent 78f26ac263
commit 7f3e2e47ed
5 changed files with 116 additions and 33 deletions

View File

@@ -1,7 +1,8 @@
from pydantic import BaseModel
from typing import List, Dict
from ..models import CrawlResult
class ScraperResult(BaseModel):
url: str
crawled_urls: List[str]
extracted_data: Dict
extracted_data: Dict[str,CrawlResult]