# Application Configuration app: title: "Crawl4AI API" version: "1.0.0" host: "0.0.0.0" port: 8020 reload: False timeout_keep_alive: 300 # Default LLM Configuration llm: provider: "openai/gpt-4o-mini" api_key_env: "OPENAI_API_KEY" # api_key: sk-... # If you pass the API key directly then api_key_env will be ignored # Redis Configuration redis: host: "localhost" port: 6379 db: 0 password: "" ssl: False ssl_cert_reqs: None ssl_ca_certs: None ssl_certfile: None ssl_keyfile: None ssl_cert_reqs: None ssl_ca_certs: None ssl_certfile: None ssl_keyfile: None # Rate Limiting Configuration rate_limiting: enabled: True default_limit: "1000/minute" trusted_proxies: [] storage_uri: "memory://" # Use "redis://localhost:6379" for production # Security Configuration security: enabled: false jwt_enabled: false https_redirect: false trusted_hosts: ["*"] headers: x_content_type_options: "nosniff" x_frame_options: "DENY" content_security_policy: "default-src 'self'" strict_transport_security: "max-age=63072000; includeSubDomains" # Crawler Pool Configuration crawler_pool: enabled: true # Set to false to disable the pool # --- Option 1: Auto-calculate size --- auto_calculate_size: true calculation_params: mem_headroom_mb: 512 # Memory reserved for OS/other apps avg_page_mem_mb: 150 # Estimated MB per concurrent "tab"/page in browsers fd_per_page: 20 # Estimated file descriptors per page core_multiplier: 4 # Max crawlers per CPU core min_pool_size: 2 # Minimum number of primary crawlers max_pool_size: 16 # Maximum number of primary crawlers # --- Option 2: Manual size (ignored if auto_calculate_size is true) --- # pool_size: 8 # --- Other Pool Settings --- backup_pool_size: 1 # Number of backup crawlers max_wait_time_s: 30.0 # Max seconds a request waits for a free crawler throttle_threshold_percent: 70.0 # Start throttling delay above this % usage throttle_delay_min_s: 0.1 # Min throttle delay throttle_delay_max_s: 0.5 # Max throttle delay # --- Browser Config for Pooled Crawlers --- browser_config: # No need for "type": "BrowserConfig" here, just params headless: true verbose: false # Keep pool crawlers less verbose in production # user_agent: "MyPooledCrawler/1.0" # Example # Add other BrowserConfig params as needed (e.g., proxy, viewport) # Crawler Configuration crawler: memory_threshold_percent: 95.0 rate_limiter: base_delay: [1.0, 2.0] timeouts: stream_init: 30.0 # Timeout for stream initialization batch_process: 300.0 # Timeout for batch processing # Logging Configuration logging: level: "INFO" format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" file: "logs/app.log" verbose: true # Observability Configuration observability: prometheus: enabled: True endpoint: "/metrics" health_check: endpoint: "/health"