docs: Update 0.7.5 video walkthrough
This commit is contained in:
@@ -32,8 +32,8 @@ from crawl4ai import hooks_to_string
|
||||
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||
|
||||
# Configuration
|
||||
# DOCKER_URL = "http://localhost:11235"
|
||||
DOCKER_URL = "http://localhost:11234"
|
||||
DOCKER_URL = "http://localhost:11235"
|
||||
# DOCKER_URL = "http://localhost:11234"
|
||||
TEST_URLS = [
|
||||
# "https://httpbin.org/html",
|
||||
"https://www.kidocode.com",
|
||||
@@ -573,7 +573,7 @@ async def main():
|
||||
("String-Based Hooks (REST API)", demo_1_string_based_hooks, False),
|
||||
("hooks_to_string() Utility", demo_2_hooks_to_string_utility, False),
|
||||
("Docker Client Auto-Conversion", demo_3_docker_client_auto_conversion, True),
|
||||
("Complete Hook Pipeline", demo_4_complete_hook_pipeline, True),
|
||||
# ("Complete Hook Pipeline", demo_4_complete_hook_pipeline, True),
|
||||
]
|
||||
|
||||
for i, (name, demo_func, is_async) in enumerate(demos, 1):
|
||||
@@ -592,7 +592,7 @@ async def main():
|
||||
# Pause between demos (except the last one)
|
||||
if i < len(demos):
|
||||
print("\n⏸️ Press Enter to continue to next demo...")
|
||||
input()
|
||||
# input()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n⏹️ Demo interrupted by user")
|
||||
@@ -605,40 +605,40 @@ async def main():
|
||||
continue
|
||||
|
||||
# Final summary
|
||||
# print("\n" + "=" * 70)
|
||||
# print(" 🎉 All Demonstrations Complete!")
|
||||
# print("=" * 70)
|
||||
print("\n" + "=" * 70)
|
||||
print(" 🎉 All Demonstrations Complete!")
|
||||
print("=" * 70)
|
||||
|
||||
# print("\n📊 Summary of v0.7.5 Docker Hooks System:")
|
||||
# print("\n🆕 COMPLETELY NEW FEATURE in v0.7.5:")
|
||||
# print(" The Docker Hooks System lets you customize the crawling pipeline")
|
||||
# print(" with user-provided Python functions at 8 strategic points.")
|
||||
print("\n📊 Summary of v0.7.5 Docker Hooks System:")
|
||||
print("\n🆕 COMPLETELY NEW FEATURE in v0.7.5:")
|
||||
print(" The Docker Hooks System lets you customize the crawling pipeline")
|
||||
print(" with user-provided Python functions at 8 strategic points.")
|
||||
|
||||
# print("\n✨ Three Ways to Use Docker Hooks (All NEW!):")
|
||||
# print(" 1. String-based - Write hooks as strings for REST API")
|
||||
# print(" 2. hooks_to_string() - Convert Python functions to strings")
|
||||
# print(" 3. Docker Client - Automatic conversion (RECOMMENDED)")
|
||||
print("\n✨ Three Ways to Use Docker Hooks (All NEW!):")
|
||||
print(" 1. String-based - Write hooks as strings for REST API")
|
||||
print(" 2. hooks_to_string() - Convert Python functions to strings")
|
||||
print(" 3. Docker Client - Automatic conversion (RECOMMENDED)")
|
||||
|
||||
# print("\n💡 Key Benefits:")
|
||||
# print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||
# print(" ✓ Type checking and linting")
|
||||
# print(" ✓ Easy to test and debug")
|
||||
# print(" ✓ Reusable across projects")
|
||||
# print(" ✓ Complete pipeline control")
|
||||
print("\n💡 Key Benefits:")
|
||||
print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||
print(" ✓ Type checking and linting")
|
||||
print(" ✓ Easy to test and debug")
|
||||
print(" ✓ Reusable across projects")
|
||||
print(" ✓ Complete pipeline control")
|
||||
|
||||
# print("\n🎯 8 Hook Points Available:")
|
||||
# print(" • on_browser_created, on_page_context_created")
|
||||
# print(" • on_user_agent_updated, before_goto, after_goto")
|
||||
# print(" • on_execution_started, before_retrieve_html, before_return_html")
|
||||
print("\n🎯 8 Hook Points Available:")
|
||||
print(" • on_browser_created, on_page_context_created")
|
||||
print(" • on_user_agent_updated, before_goto, after_goto")
|
||||
print(" • on_execution_started, before_retrieve_html, before_return_html")
|
||||
|
||||
# print("\n📚 Resources:")
|
||||
# print(" • Docs: https://docs.crawl4ai.com")
|
||||
# print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
||||
# print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
||||
print("\n📚 Resources:")
|
||||
print(" • Docs: https://docs.crawl4ai.com")
|
||||
print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
||||
print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
||||
|
||||
# print("\n" + "=" * 70)
|
||||
# print(" Happy Crawling with v0.7.5! 🕷️")
|
||||
# print("=" * 70 + "\n")
|
||||
print("\n" + "=" * 70)
|
||||
print(" Happy Crawling with v0.7.5! 🕷️")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -62,7 +62,33 @@
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 🔒 Feature 1: HTTPS Preservation for Internal Links\n",
|
||||
"## 🔧 Feature 1: Docker Hooks System (NEW! 🆕)\n",
|
||||
"\n",
|
||||
"### What is it?\n",
|
||||
"v0.7.5 introduces a **completely new Docker Hooks System** that lets you inject custom Python functions at 8 key points in the crawling pipeline. This gives you full control over:\n",
|
||||
"- Authentication setup\n",
|
||||
"- Performance optimization\n",
|
||||
"- Content processing\n",
|
||||
"- Custom behavior at each stage\n",
|
||||
"\n",
|
||||
"### Three Ways to Use Docker Hooks\n",
|
||||
"\n",
|
||||
"The Docker Hooks System offers three approaches, all part of this new feature:\n",
|
||||
"\n",
|
||||
"1. **String-based hooks** - Write hooks as strings for REST API\n",
|
||||
"2. **Using `hooks_to_string()` utility** - Convert Python functions to strings\n",
|
||||
"3. **Docker Client auto-conversion** - Pass functions directly (most convenient)\n",
|
||||
"\n",
|
||||
"All three approaches are NEW in v0.7.5!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 🔒 Feature 2: HTTPS Preservation for Internal Links\n",
|
||||
"\n",
|
||||
"### Problem\n",
|
||||
"When crawling HTTPS sites, internal links sometimes get downgraded to HTTP, breaking authentication and causing security warnings.\n",
|
||||
@@ -416,7 +442,7 @@
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 🤖 Feature 2: Enhanced LLM Integration\n",
|
||||
"## 🤖 Feature 3: Enhanced LLM Integration\n",
|
||||
"\n",
|
||||
"### What's New\n",
|
||||
"- Custom `temperature` parameter for creativity control\n",
|
||||
@@ -979,32 +1005,6 @@
|
||||
"await demo_enhanced_llm()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 🔧 Feature 3: Docker Hooks System (NEW! 🆕)\n",
|
||||
"\n",
|
||||
"### What is it?\n",
|
||||
"v0.7.5 introduces a **completely new Docker Hooks System** that lets you inject custom Python functions at 8 key points in the crawling pipeline. This gives you full control over:\n",
|
||||
"- Authentication setup\n",
|
||||
"- Performance optimization\n",
|
||||
"- Content processing\n",
|
||||
"- Custom behavior at each stage\n",
|
||||
"\n",
|
||||
"### Three Ways to Use Docker Hooks\n",
|
||||
"\n",
|
||||
"The Docker Hooks System offers three approaches, all part of this new feature:\n",
|
||||
"\n",
|
||||
"1. **String-based hooks** - Write hooks as strings for REST API\n",
|
||||
"2. **Using `hooks_to_string()` utility** - Convert Python functions to strings\n",
|
||||
"3. **Docker Client auto-conversion** - Pass functions directly (most convenient)\n",
|
||||
"\n",
|
||||
"All three approaches are NEW in v0.7.5!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
Reference in New Issue
Block a user