fix: Import os and adjust file saving path in URL seeder demo
This commit is contained in:
@@ -2,10 +2,12 @@
|
|||||||
🚀 URL Seeder + AsyncWebCrawler = Magic!
|
🚀 URL Seeder + AsyncWebCrawler = Magic!
|
||||||
Quick demo showing discovery → filter → crawl pipeline
|
Quick demo showing discovery → filter → crawl pipeline
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio, os
|
||||||
from crawl4ai import AsyncUrlSeeder, AsyncWebCrawler, SeedingConfig, CrawlerRunConfig, AsyncLogger, DefaultMarkdownGenerator
|
from crawl4ai import AsyncUrlSeeder, AsyncWebCrawler, SeedingConfig, CrawlerRunConfig, AsyncLogger, DefaultMarkdownGenerator
|
||||||
from crawl4ai.content_filter_strategy import PruningContentFilter
|
from crawl4ai.content_filter_strategy import PruningContentFilter
|
||||||
|
|
||||||
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
# 🔍 Example 1: Discover ALL → Filter → Crawl
|
# 🔍 Example 1: Discover ALL → Filter → Crawl
|
||||||
async def discover_and_crawl():
|
async def discover_and_crawl():
|
||||||
"""Find Python module tutorials & extract them all!"""
|
"""Find Python module tutorials & extract them all!"""
|
||||||
@@ -57,6 +59,7 @@ async def discover_and_crawl():
|
|||||||
if result.success:
|
if result.success:
|
||||||
# Save each tutorial (name from URL)
|
# Save each tutorial (name from URL)
|
||||||
name = result.url.split("/")[-2] + ".md"
|
name = result.url.split("/")[-2] + ".md"
|
||||||
|
name = os.path.join(CURRENT_DIR, name)
|
||||||
with open(name, "w") as f:
|
with open(name, "w") as f:
|
||||||
f.write(result.markdown.fit_markdown)
|
f.write(result.markdown.fit_markdown)
|
||||||
saved += 1
|
saved += 1
|
||||||
@@ -117,11 +120,11 @@ async def smart_search_with_bm25():
|
|||||||
|
|
||||||
# 🎬 Run the show!
|
# 🎬 Run the show!
|
||||||
async def main():
|
async def main():
|
||||||
# print("=" * 60)
|
print("=" * 60)
|
||||||
# await discover_and_crawl()
|
await discover_and_crawl()
|
||||||
# print("\n" + "=" * 60 + "\n")
|
print("\n" + "=" * 60 + "\n")
|
||||||
# await explore_beautifulsoup()
|
await explore_beautifulsoup()
|
||||||
# print("\n" + "=" * 60 + "\n")
|
print("\n" + "=" * 60 + "\n")
|
||||||
await smart_search_with_bm25()
|
await smart_search_with_bm25()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user