import modules from enhanceable deserialization

This commit is contained in:
Chris Murphy
2025-12-01 16:18:59 -05:00
parent e95e8e1a97
commit 3a8f8298d3

View File

@@ -1,5 +1,5 @@
import importlib
import os import os
from typing import Union
import warnings import warnings
import requests import requests
from .config import ( from .config import (
@@ -27,14 +27,14 @@ from .table_extraction import TableExtractionStrategy, DefaultTableExtraction
from .cache_context import CacheMode from .cache_context import CacheMode
from .proxy_strategy import ProxyRotationStrategy from .proxy_strategy import ProxyRotationStrategy
from typing import Union, List, Callable
import inspect import inspect
from typing import Any, Dict, Optional from typing import Any, Callable, Dict, List, Optional, Union
from enum import Enum from enum import Enum
# Type alias for URL matching # Type alias for URL matching
UrlMatcher = Union[str, Callable[[str], bool], List[Union[str, Callable[[str], bool]]]] UrlMatcher = Union[str, Callable[[str], bool], List[Union[str, Callable[[str], bool]]]]
class MatchMode(Enum): class MatchMode(Enum):
OR = "or" OR = "or"
AND = "and" AND = "and"
@@ -42,8 +42,7 @@ class MatchMode(Enum):
# from .proxy_strategy import ProxyConfig # from .proxy_strategy import ProxyConfig
def to_serializable_dict(obj: Any, ignore_default_value : bool = False):
def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
""" """
Recursively convert an object to a serializable dictionary using {type, params} structure Recursively convert an object to a serializable dictionary using {type, params} structure
for complex objects. for complex objects.
@@ -110,8 +109,6 @@ def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
# if value is not None: # if value is not None:
# current_values[attr_name] = to_serializable_dict(value) # current_values[attr_name] = to_serializable_dict(value)
return { return {
"type": obj.__class__.__name__, "type": obj.__class__.__name__,
"params": current_values "params": current_values
@@ -137,12 +134,20 @@ def from_serializable_dict(data: Any) -> Any:
if data["type"] == "dict" and "value" in data: if data["type"] == "dict" and "value" in data:
return {k: from_serializable_dict(v) for k, v in data["value"].items()} return {k: from_serializable_dict(v) for k, v in data["value"].items()}
# Import from crawl4ai for class instances cls = None
import crawl4ai # If you are receiving an error while trying to convert a dict to an object:
# Either add a module to `modules_paths` list, or add the `data["type"]` to the crawl4ai __init__.py file
if hasattr(crawl4ai, data["type"]): module_paths = ["crawl4ai"]
cls = getattr(crawl4ai, data["type"]) for module_path in module_paths:
try:
mod = importlib.import_module(module_path)
if hasattr(mod, data["type"]):
cls = getattr(mod, data["type"])
break
except (ImportError, AttributeError):
continue
if cls is not None:
# Handle Enum # Handle Enum
if issubclass(cls, Enum): if issubclass(cls, Enum):
return cls(data["params"]) return cls(data["params"])