refactor(crawler): improve HTML handling and cleanup codebase
- Add HTML attribute preservation in GoogleSearchCrawler - Fix lxml import references in utils.py - Remove unused ssl_certificate.json - Clean up imports and code organization in hub.py - Update test case formatting and remove unused image search test BREAKING CHANGE: Removed ssl_certificate.json file which might affect existing certificate validations
This commit is contained in:
@@ -1,17 +1,13 @@
|
||||
import importlib
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
import logging
|
||||
# crawl4ai/hub.py
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Type
|
||||
import logging
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
import inspect
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# crawl4ai/base.py
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
import logging
|
||||
|
||||
class BaseCrawler(ABC):
|
||||
def __init__(self):
|
||||
|
||||
Reference in New Issue
Block a user