Enhance features and documentation

- Updated version to 0.3.743
  - Improved ManagedBrowser configuration with dynamic host/port
  - Implemented fast HTML formatting in web crawler
  - Enhanced markdown generation with a new generator class
  - Improved sanitization and utility functions
  - Added contributor details and pull request acknowledgments
  - Updated documentation for clearer usage scenarios
  - Adjusted tests to reflect class name changes
This commit is contained in:
UncleCode
2024-11-28 12:45:05 +08:00
parent 829a1f7992
commit 24723b2f10
9 changed files with 123 additions and 42 deletions

View File

@@ -25,7 +25,8 @@ from .config import (
from .utils import (
sanitize_input_encode,
InvalidCSSSelectorError,
format_html
format_html,
fast_format_html
)
from urllib.parse import urlparse
import random
@@ -534,16 +535,17 @@ class AsyncWebCrawler:
"timing": time.perf_counter() - t1
}
)
screenshot = None if not screenshot else screenshot
if kwargs.get("prettiify", False):
cleaned_html = fast_format_html(cleaned_html)
return CrawlResult(
url=url,
html=html,
cleaned_html=format_html(cleaned_html),
cleaned_html=cleaned_html,
markdown_v2=markdown_v2,
markdown=markdown,
fit_markdown=fit_markdown,