docs(examples): update demo scripts and fix output formats
Update example scripts to reflect latest API changes and improve demonstrations: - Increase test URLs in dispatcher example from 20 to 40 pages - Comment out unused dispatcher strategies for cleaner output - Fix scraping strategies performance script to use correct object notation - Update v0_4_3_features_demo with additional feature mentions and uncomment demo sections These changes make the examples more current and better aligned with the actual API.
This commit is contained in:
@@ -117,17 +117,17 @@ def test_scraping():
|
||||
timing_stats.report()
|
||||
|
||||
# Print stats of LXML output
|
||||
print("\nLXML Output:")
|
||||
print(f"\nExtracted links: {len(result_selected['links']['internal']) + len(result_selected['links']['external'])}")
|
||||
print(f"Extracted images: {len(result_selected['media']['images'])}")
|
||||
print(f"Clean HTML size: {len(result_selected['cleaned_html'])/1024:.2f} KB")
|
||||
print("\Turbo Output:")
|
||||
print(f"\nExtracted links: {len(result_selected.links.internal) + len(result_selected.links.external)}")
|
||||
print(f"Extracted images: {len(result_selected.media.images)}")
|
||||
print(f"Clean HTML size: {len(result_selected.cleaned_html)/1024:.2f} KB")
|
||||
print(f"Scraping time: {t2 - t1:.2f} seconds")
|
||||
|
||||
# Print stats of original output
|
||||
print("\nOriginal Output:")
|
||||
print(f"\nExtracted links: {len(result_original['links']['internal']) + len(result_original['links']['external'])}")
|
||||
print(f"Extracted images: {len(result_original['media']['images'])}")
|
||||
print(f"Clean HTML size: {len(result_original['cleaned_html'])/1024:.2f} KB")
|
||||
print(f"\nExtracted links: {len(result_original.links.internal) + len(result_original.links.external)}")
|
||||
print(f"Extracted images: {len(result_original.media.images)}")
|
||||
print(f"Clean HTML size: {len(result_original.cleaned_html)/1024:.2f} KB")
|
||||
print(f"Scraping time: {t3 - t1:.2f} seconds")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user