refactor(crawler): improve HTML handling and cleanup codebase

- Add HTML attribute preservation in GoogleSearchCrawler
- Fix lxml import references in utils.py
- Remove unused ssl_certificate.json
- Clean up imports and code organization in hub.py
- Update test case formatting and remove unused image search test

BREAKING CHANGE: Removed ssl_certificate.json file which might affect existing certificate validations
This commit is contained in:
UncleCode
2025-02-07 21:56:27 +08:00
parent 91073c1244
commit b957ff2ecd
5 changed files with 17 additions and 79 deletions

View File

@@ -1,17 +1,13 @@
import importlib
import pkgutil
from pathlib import Path
import logging
# crawl4ai/hub.py
from abc import ABC, abstractmethod
from typing import Dict, Type
import logging
import importlib
from pathlib import Path
import inspect
logger = logging.getLogger(__name__)
# crawl4ai/base.py
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
import json
import logging
class BaseCrawler(ABC):
def __init__(self):