Compare commits

...

2 Commits

View File

@@ -2,8 +2,8 @@
import asyncio, json, hashlib, time, psutil import asyncio, json, hashlib, time, psutil
from contextlib import suppress from contextlib import suppress
from typing import Dict from typing import Dict
from crawl4ai import AsyncWebCrawler, BrowserConfig from crawl4ai import AsyncWebCrawler, BrowserConfig, BrowserAdapter
from typing import Dict from typing import Dict ,Optional
from utils import load_config from utils import load_config
CONFIG = load_config() CONFIG = load_config()
@@ -15,11 +15,22 @@ LOCK = asyncio.Lock()
MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM refuse new browsers above this MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM refuse new browsers above this
IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30min IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30min
def _sig(cfg: BrowserConfig) -> str:
payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",",":")) def _sig(cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None) -> str:
try:
config_payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",", ":"))
except (TypeError, ValueError):
# Fallback to string representation if JSON serialization fails
config_payload = str(cfg.to_dict())
adapter_name = adapter.__class__.__name__ if adapter else "PlaywrightAdapter"
payload = f"{config_payload}:{adapter_name}"
return hashlib.sha1(payload.encode()).hexdigest() return hashlib.sha1(payload.encode()).hexdigest()
async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
async def get_crawler(
cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None
) -> AsyncWebCrawler:
sig = None
try: try:
sig = _sig(cfg) sig = _sig(cfg)
async with LOCK: async with LOCK:
@@ -37,6 +48,7 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
except Exception as e: except Exception as e:
raise RuntimeError(f"Failed to start browser: {e}") raise RuntimeError(f"Failed to start browser: {e}")
finally: finally:
if sig:
if sig in POOL: if sig in POOL:
LAST_USED[sig] = time.time() LAST_USED[sig] = time.time()
else: else: