fix the EXTRACT to match the styling of the other methods

2025-05-14 16:01:10 +08:00
parent 260e2dc347
commit 137556b3dc
1 changed files with 9 additions and 7 deletions
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -360,7 +360,7 @@ class AsyncWebCrawler:
                        pdf_data=pdf_data,
                        verbose=config.verbose,
                        is_raw_html=True if url.startswith("raw:") else False,
-                        redirected_url=async_response.redirected_url, 
+                        redirected_url=async_response.redirected_url,
                        **kwargs,
                    )

@@ -503,7 +503,7 @@ class AsyncWebCrawler:
            tables = media.pop("tables", [])
            links = result.links.model_dump()
            metadata = result.metadata
-            
+
        fit_html = preprocess_html_for_schema(html_content=html, text_threshold= 500, max_size= 300_000)

        ################################
@@ -585,11 +585,13 @@ class AsyncWebCrawler:
            # Choose content based on input_format
            content_format = config.extraction_strategy.input_format
            if content_format == "fit_markdown" and not markdown_result.fit_markdown:
-                self.logger.warning(
-                    message="Fit markdown requested but not available. Falling back to raw markdown.",
-                    tag="EXTRACT",
-                    params={"url": _url},
-                )
+
+                self.logger.url_status(
+                        url=_url,
+                        success=bool(html),
+                        timing=time.perf_counter() - t1,
+                        tag="EXTRACT",
+                    )
                content_format = "markdown"

            content = {