Compare commits
2 Commits
unclecode-
...
vr0.5.0.po
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e896c08f9c | ||
|
|
56bc3c6e45 |
@@ -1,2 +1,2 @@
|
|||||||
# crawl4ai/_version.py
|
# crawl4ai/_version.py
|
||||||
__version__ = "0.5.0"
|
__version__ = "0.5.0.post1"
|
||||||
|
|||||||
@@ -712,7 +712,7 @@ def profiles_cmd():
|
|||||||
# Run interactive profile manager
|
# Run interactive profile manager
|
||||||
anyio.run(manage_profiles)
|
anyio.run(manage_profiles)
|
||||||
|
|
||||||
@cli.command()
|
@cli.command(name="")
|
||||||
@click.argument("url", required=False)
|
@click.argument("url", required=False)
|
||||||
@click.option("--example", is_flag=True, help="Show usage examples")
|
@click.option("--example", is_flag=True, help="Show usage examples")
|
||||||
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
|
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
|
||||||
@@ -772,5 +772,11 @@ def default(url: str, example: bool, browser_config: str, crawler_config: str, f
|
|||||||
profile=profile
|
profile=profile
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) < 2 or sys.argv[1] not in cli.commands:
|
||||||
|
sys.argv.insert(1, "crawl")
|
||||||
|
cli()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli()
|
main()
|
||||||
@@ -38,8 +38,8 @@ rate_limiting:
|
|||||||
|
|
||||||
# Security Configuration
|
# Security Configuration
|
||||||
security:
|
security:
|
||||||
enabled: false
|
enabled: true
|
||||||
jwt_enabled: false
|
jwt_enabled: true
|
||||||
https_redirect: false
|
https_redirect: false
|
||||||
trusted_hosts: ["*"]
|
trusted_hosts: ["*"]
|
||||||
headers:
|
headers:
|
||||||
@@ -68,4 +68,4 @@ observability:
|
|||||||
enabled: True
|
enabled: True
|
||||||
endpoint: "/metrics"
|
endpoint: "/metrics"
|
||||||
health_check:
|
health_check:
|
||||||
endpoint: "/health"
|
endpoint: "/health"
|
||||||
@@ -65,7 +65,6 @@ async def basic_deep_crawl():
|
|||||||
f"\n✅ Performance: {len(results)} pages in {time.perf_counter() - start_time:.2f} seconds"
|
f"\n✅ Performance: {len(results)} pages in {time.perf_counter() - start_time:.2f} seconds"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# 2️⃣ Stream vs. Non-Stream Execution
|
# 2️⃣ Stream vs. Non-Stream Execution
|
||||||
async def stream_vs_nonstream():
|
async def stream_vs_nonstream():
|
||||||
"""
|
"""
|
||||||
@@ -127,7 +126,6 @@ async def stream_vs_nonstream():
|
|||||||
print(f" ✅ All results: {time.perf_counter() - start_time:.2f} seconds")
|
print(f" ✅ All results: {time.perf_counter() - start_time:.2f} seconds")
|
||||||
print("\n🔍 Key Takeaway: Streaming allows processing results immediately")
|
print("\n🔍 Key Takeaway: Streaming allows processing results immediately")
|
||||||
|
|
||||||
|
|
||||||
# 3️⃣ Introduce Filters & Scorers
|
# 3️⃣ Introduce Filters & Scorers
|
||||||
async def filters_and_scorers():
|
async def filters_and_scorers():
|
||||||
"""
|
"""
|
||||||
@@ -236,7 +234,6 @@ async def filters_and_scorers():
|
|||||||
print(f" ✅ Crawler prioritized {len(results)} pages by relevance score")
|
print(f" ✅ Crawler prioritized {len(results)} pages by relevance score")
|
||||||
print(" 🔍 Note: BestFirstCrawlingStrategy visits highest-scoring pages first")
|
print(" 🔍 Note: BestFirstCrawlingStrategy visits highest-scoring pages first")
|
||||||
|
|
||||||
|
|
||||||
# 4️⃣ Wrap-Up and Key Takeaways
|
# 4️⃣ Wrap-Up and Key Takeaways
|
||||||
async def wrap_up():
|
async def wrap_up():
|
||||||
"""
|
"""
|
||||||
@@ -307,7 +304,6 @@ async def wrap_up():
|
|||||||
for depth, count in sorted(depth_counts.items()):
|
for depth, count in sorted(depth_counts.items()):
|
||||||
print(f" Depth {depth}: {count} pages")
|
print(f" Depth {depth}: {count} pages")
|
||||||
|
|
||||||
|
|
||||||
# 5️⃣ Advanced Filters
|
# 5️⃣ Advanced Filters
|
||||||
async def advanced_filters():
|
async def advanced_filters():
|
||||||
"""
|
"""
|
||||||
@@ -371,7 +367,6 @@ async def advanced_filters():
|
|||||||
relevance_score = result.metadata.get("relevance_score", 0)
|
relevance_score = result.metadata.get("relevance_score", 0)
|
||||||
print(f" → Score: {relevance_score:.2f} | {result.url}")
|
print(f" → Score: {relevance_score:.2f} | {result.url}")
|
||||||
|
|
||||||
|
|
||||||
# Main function to run the entire tutorial
|
# Main function to run the entire tutorial
|
||||||
async def max_pages_and_thresholds():
|
async def max_pages_and_thresholds():
|
||||||
"""
|
"""
|
||||||
@@ -497,7 +492,6 @@ async def run_tutorial():
|
|||||||
print("You now have a comprehensive understanding of deep crawling with Crawl4AI.")
|
print("You now have a comprehensive understanding of deep crawling with Crawl4AI.")
|
||||||
print("For more information, check out https://docs.crawl4ai.com")
|
print("For more information, check out https://docs.crawl4ai.com")
|
||||||
|
|
||||||
|
|
||||||
# Execute the tutorial when run directly
|
# Execute the tutorial when run directly
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(run_tutorial())
|
asyncio.run(run_tutorial())
|
||||||
@@ -78,7 +78,7 @@ crawl4ai-download-models = "crawl4ai.model_loader:main"
|
|||||||
crawl4ai-migrate = "crawl4ai.migrations:main"
|
crawl4ai-migrate = "crawl4ai.migrations:main"
|
||||||
crawl4ai-setup = "crawl4ai.install:post_install"
|
crawl4ai-setup = "crawl4ai.install:post_install"
|
||||||
crawl4ai-doctor = "crawl4ai.install:doctor"
|
crawl4ai-doctor = "crawl4ai.install:doctor"
|
||||||
crwl = "crawl4ai.cli:cli"
|
crwl = "crawl4ai.cli:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
|
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
|
||||||
|
|||||||
Reference in New Issue
Block a user