Compare commits
2 Commits
main
...
docker/fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
05ec0535cd | ||
|
|
83aeb565ee |
@@ -2,8 +2,8 @@
|
|||||||
import asyncio, json, hashlib, time, psutil
|
import asyncio, json, hashlib, time, psutil
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
from crawl4ai import AsyncWebCrawler, BrowserConfig, BrowserAdapter
|
||||||
from typing import Dict
|
from typing import Dict ,Optional
|
||||||
from utils import load_config
|
from utils import load_config
|
||||||
|
|
||||||
CONFIG = load_config()
|
CONFIG = load_config()
|
||||||
@@ -15,11 +15,22 @@ LOCK = asyncio.Lock()
|
|||||||
MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM – refuse new browsers above this
|
MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM – refuse new browsers above this
|
||||||
IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30 min
|
IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30 min
|
||||||
|
|
||||||
def _sig(cfg: BrowserConfig) -> str:
|
|
||||||
payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",",":"))
|
def _sig(cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None) -> str:
|
||||||
|
try:
|
||||||
|
config_payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",", ":"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# Fallback to string representation if JSON serialization fails
|
||||||
|
config_payload = str(cfg.to_dict())
|
||||||
|
adapter_name = adapter.__class__.__name__ if adapter else "PlaywrightAdapter"
|
||||||
|
payload = f"{config_payload}:{adapter_name}"
|
||||||
return hashlib.sha1(payload.encode()).hexdigest()
|
return hashlib.sha1(payload.encode()).hexdigest()
|
||||||
|
|
||||||
async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
|
|
||||||
|
async def get_crawler(
|
||||||
|
cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None
|
||||||
|
) -> AsyncWebCrawler:
|
||||||
|
sig = None
|
||||||
try:
|
try:
|
||||||
sig = _sig(cfg)
|
sig = _sig(cfg)
|
||||||
async with LOCK:
|
async with LOCK:
|
||||||
@@ -37,6 +48,7 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to start browser: {e}")
|
raise RuntimeError(f"Failed to start browser: {e}")
|
||||||
finally:
|
finally:
|
||||||
|
if sig:
|
||||||
if sig in POOL:
|
if sig in POOL:
|
||||||
LAST_USED[sig] = time.time()
|
LAST_USED[sig] = time.time()
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user