feat(api): implement crawler pool manager for improved resource handling

Adds a new CrawlerManager class to handle browser instance pooling and failover: - Implements auto-scaling based on system resources - Adds primary/backup crawler management - Integrates memory monitoring and throttling - Adds streaming support with memory tracking - Updates API endpoints to use pooled crawlers BREAKING CHANGE: API endpoints now require CrawlerManager initialization
2025-04-18 22:26:24 +08:00
parent 907cba194f
commit 16b2318242
9 changed files with 2082 additions and 59 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -542,9 +542,9 @@ class AsyncWebCrawler:
            markdown_input_html = source_lambda()

            # Log which source is being used (optional, but helpful for debugging)
-            if self.logger and verbose:
-                actual_source_used = selected_html_source if selected_html_source in html_source_selector else 'cleaned_html (default)'
-                self.logger.debug(f"Using '{actual_source_used}' as source for Markdown generation for {url}", tag="MARKDOWN_SRC")
+            # if self.logger and verbose:
+            #     actual_source_used = selected_html_source if selected_html_source in html_source_selector else 'cleaned_html (default)'
+            #     self.logger.debug(f"Using '{actual_source_used}' as source for Markdown generation for {url}", tag="MARKDOWN_SRC")

        except Exception as e:
            # Handle potential errors, especially from preprocess_html_for_schema