# crawler_pool.py  (new file)
import asyncio
import hashlib
import json
import time
from contextlib import suppress
from typing import Dict, Optional

import psutil

from crawl4ai import AsyncWebCrawler, BrowserConfig
from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy

# Import browser adapters with fallback
try:
    from crawl4ai.browser_adapter import BrowserAdapter, PlaywrightAdapter
except ImportError:
    # Fallback for development environment
    import os
    import sys

    sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
    from crawl4ai.browser_adapter import BrowserAdapter, PlaywrightAdapter
from utils import load_config

CONFIG = load_config()

POOL: Dict[str, AsyncWebCrawler] = {}
LAST_USED: Dict[str, float] = {}
LOCK = asyncio.Lock()

MEM_LIMIT = CONFIG.get("crawler", {}).get(
    "memory_threshold_percent", 95.0
)  # % RAM – refuse new browsers above this
IDLE_TTL = (
    CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800)
)  # close if unused for 30 min


def _sig(cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None) -> str:
    try:
        config_payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",", ":"))
    except (TypeError, ValueError):
        # Fallback to string representation if JSON serialization fails
        config_payload = str(cfg.to_dict())
    adapter_name = adapter.__class__.__name__ if adapter else "PlaywrightAdapter"
    payload = f"{config_payload}:{adapter_name}"
    return hashlib.sha1(payload.encode()).hexdigest()


async def get_crawler(
    cfg: BrowserConfig, adapter: Optional[BrowserAdapter] = None
) -> AsyncWebCrawler:
    sig = None
    try:
        sig = _sig(cfg, adapter)
        async with LOCK:
            if sig in POOL:
                LAST_USED[sig] = time.time()
                return POOL[sig]
            if psutil.virtual_memory().percent >= MEM_LIMIT:
                raise MemoryError("RAM pressure – new browser denied")

            # Create crawler - let it initialize the strategy with proper logger
            # Pass browser_adapter as a kwarg so AsyncWebCrawler can use it when creating the strategy
            crawler = AsyncWebCrawler(
                config=cfg, 
                thread_safe=False
            )
            
            # Set the browser adapter on the strategy after crawler initialization
            if adapter:
                # Create a new strategy with the adapter and the crawler's logger
                from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
                crawler.crawler_strategy = AsyncPlaywrightCrawlerStrategy(
                    browser_config=cfg, 
                    logger=crawler.logger,
                    browser_adapter=adapter
                )
            
            await crawler.start()
            POOL[sig] = crawler
            LAST_USED[sig] = time.time()
            return crawler
    except MemoryError as e:
        raise MemoryError(f"RAM pressure – new browser denied: {e}")
    except Exception as e:
        raise RuntimeError(f"Failed to start browser: {e}")
    finally:
        if sig:
            if sig in POOL:
                LAST_USED[sig] = time.time()
            else:
                # If we failed to start the browser, we should remove it from the pool
                POOL.pop(sig, None)
                LAST_USED.pop(sig, None)
        # If we failed to start the browser, we should remove it from the pool


async def close_all():
    async with LOCK:
        await asyncio.gather(
            *(c.close() for c in POOL.values()), return_exceptions=True
        )
        POOL.clear()
        LAST_USED.clear()


async def janitor():
    while True:
        await asyncio.sleep(60)
        now = time.time()
        async with LOCK:
            for sig, crawler in list(POOL.items()):
                if now - LAST_USED[sig] > IDLE_TTL:
                    with suppress(Exception):
                        await crawler.close()
                    POOL.pop(sig, None)
                    LAST_USED.pop(sig, None)