Fix critical RCE via deserialization and eval() in /crawl endpoint
- Replace raw eval() in _compute_field() with AST-validated _safe_eval_expression() that blocks __import__, dunder attribute access, and import statements while preserving safe transforms - Add ALLOWED_DESERIALIZE_TYPES allowlist to from_serializable_dict() preventing arbitrary class instantiation from API input - Update security contact email and add v0.8.1 security fixes to SECURITY.md with researcher acknowledgment - Add 17 security tests covering both fixes
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import ast
|
||||
import inspect
|
||||
from typing import Any, List, Dict, Optional, Tuple, Pattern, Union
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
@@ -1001,6 +1002,69 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
||||
#######################################################
|
||||
# New extraction strategies for JSON-based extraction #
|
||||
#######################################################
|
||||
|
||||
# Safe builtins allowed in computed field expressions
|
||||
_SAFE_EVAL_BUILTINS = {
|
||||
"str": str, "int": int, "float": float, "bool": bool,
|
||||
"len": len, "round": round, "abs": abs, "min": min, "max": max,
|
||||
"sum": sum, "sorted": sorted, "reversed": reversed,
|
||||
"list": list, "dict": dict, "tuple": tuple, "set": set,
|
||||
"enumerate": enumerate, "zip": zip, "map": map, "filter": filter,
|
||||
"any": any, "all": all, "range": range,
|
||||
"True": True, "False": False, "None": None,
|
||||
"isinstance": isinstance, "type": type,
|
||||
}
|
||||
|
||||
|
||||
def _safe_eval_expression(expression: str, local_vars: dict) -> Any:
|
||||
"""
|
||||
Evaluate a computed field expression safely using AST validation.
|
||||
|
||||
Allows simple transforms (math, string methods, attribute access on data)
|
||||
while blocking dangerous operations (__import__, dunder access, etc.).
|
||||
|
||||
Args:
|
||||
expression: The Python expression string to evaluate.
|
||||
local_vars: The local variables (extracted item fields) available to the expression.
|
||||
|
||||
Returns:
|
||||
The result of evaluating the expression.
|
||||
|
||||
Raises:
|
||||
ValueError: If the expression contains disallowed constructs.
|
||||
"""
|
||||
try:
|
||||
tree = ast.parse(expression, mode="eval")
|
||||
except SyntaxError as e:
|
||||
raise ValueError(f"Invalid expression syntax: {e}")
|
||||
|
||||
for node in ast.walk(tree):
|
||||
# Block import statements
|
||||
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
||||
raise ValueError("Import statements are not allowed in expressions")
|
||||
|
||||
# Block attribute access to dunder attributes (e.g., __class__, __globals__)
|
||||
if isinstance(node, ast.Attribute) and node.attr.startswith("_"):
|
||||
raise ValueError(
|
||||
f"Access to private/dunder attribute '{node.attr}' is not allowed"
|
||||
)
|
||||
|
||||
# Block calls to __import__ or any name starting with _
|
||||
if isinstance(node, ast.Call):
|
||||
func = node.func
|
||||
if isinstance(func, ast.Name) and func.id.startswith("_"):
|
||||
raise ValueError(
|
||||
f"Calling '{func.id}' is not allowed in expressions"
|
||||
)
|
||||
if isinstance(func, ast.Attribute) and func.attr.startswith("_"):
|
||||
raise ValueError(
|
||||
f"Calling '{func.attr}' is not allowed in expressions"
|
||||
)
|
||||
|
||||
safe_globals = {"__builtins__": _SAFE_EVAL_BUILTINS}
|
||||
return eval(compile(tree, "<expression>", "eval"), safe_globals, local_vars)
|
||||
|
||||
|
||||
class JsonElementExtractionStrategy(ExtractionStrategy):
|
||||
"""
|
||||
Abstract base class for extracting structured JSON from HTML content.
|
||||
@@ -1236,7 +1300,7 @@ class JsonElementExtractionStrategy(ExtractionStrategy):
|
||||
def _compute_field(self, item, field):
|
||||
try:
|
||||
if "expression" in field:
|
||||
return eval(field["expression"], {}, item)
|
||||
return _safe_eval_expression(field["expression"], item)
|
||||
elif "function" in field:
|
||||
return field["function"](item)
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user