docs: Update 0.7.5 video walkthrough
This commit is contained in:
@@ -32,8 +32,8 @@ from crawl4ai import hooks_to_string
|
|||||||
from crawl4ai.docker_client import Crawl4aiDockerClient
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
# DOCKER_URL = "http://localhost:11235"
|
DOCKER_URL = "http://localhost:11235"
|
||||||
DOCKER_URL = "http://localhost:11234"
|
# DOCKER_URL = "http://localhost:11234"
|
||||||
TEST_URLS = [
|
TEST_URLS = [
|
||||||
# "https://httpbin.org/html",
|
# "https://httpbin.org/html",
|
||||||
"https://www.kidocode.com",
|
"https://www.kidocode.com",
|
||||||
@@ -573,7 +573,7 @@ async def main():
|
|||||||
("String-Based Hooks (REST API)", demo_1_string_based_hooks, False),
|
("String-Based Hooks (REST API)", demo_1_string_based_hooks, False),
|
||||||
("hooks_to_string() Utility", demo_2_hooks_to_string_utility, False),
|
("hooks_to_string() Utility", demo_2_hooks_to_string_utility, False),
|
||||||
("Docker Client Auto-Conversion", demo_3_docker_client_auto_conversion, True),
|
("Docker Client Auto-Conversion", demo_3_docker_client_auto_conversion, True),
|
||||||
("Complete Hook Pipeline", demo_4_complete_hook_pipeline, True),
|
# ("Complete Hook Pipeline", demo_4_complete_hook_pipeline, True),
|
||||||
]
|
]
|
||||||
|
|
||||||
for i, (name, demo_func, is_async) in enumerate(demos, 1):
|
for i, (name, demo_func, is_async) in enumerate(demos, 1):
|
||||||
@@ -592,7 +592,7 @@ async def main():
|
|||||||
# Pause between demos (except the last one)
|
# Pause between demos (except the last one)
|
||||||
if i < len(demos):
|
if i < len(demos):
|
||||||
print("\n⏸️ Press Enter to continue to next demo...")
|
print("\n⏸️ Press Enter to continue to next demo...")
|
||||||
input()
|
# input()
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print(f"\n⏹️ Demo interrupted by user")
|
print(f"\n⏹️ Demo interrupted by user")
|
||||||
@@ -605,40 +605,40 @@ async def main():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Final summary
|
# Final summary
|
||||||
# print("\n" + "=" * 70)
|
print("\n" + "=" * 70)
|
||||||
# print(" 🎉 All Demonstrations Complete!")
|
print(" 🎉 All Demonstrations Complete!")
|
||||||
# print("=" * 70)
|
print("=" * 70)
|
||||||
|
|
||||||
# print("\n📊 Summary of v0.7.5 Docker Hooks System:")
|
print("\n📊 Summary of v0.7.5 Docker Hooks System:")
|
||||||
# print("\n🆕 COMPLETELY NEW FEATURE in v0.7.5:")
|
print("\n🆕 COMPLETELY NEW FEATURE in v0.7.5:")
|
||||||
# print(" The Docker Hooks System lets you customize the crawling pipeline")
|
print(" The Docker Hooks System lets you customize the crawling pipeline")
|
||||||
# print(" with user-provided Python functions at 8 strategic points.")
|
print(" with user-provided Python functions at 8 strategic points.")
|
||||||
|
|
||||||
# print("\n✨ Three Ways to Use Docker Hooks (All NEW!):")
|
print("\n✨ Three Ways to Use Docker Hooks (All NEW!):")
|
||||||
# print(" 1. String-based - Write hooks as strings for REST API")
|
print(" 1. String-based - Write hooks as strings for REST API")
|
||||||
# print(" 2. hooks_to_string() - Convert Python functions to strings")
|
print(" 2. hooks_to_string() - Convert Python functions to strings")
|
||||||
# print(" 3. Docker Client - Automatic conversion (RECOMMENDED)")
|
print(" 3. Docker Client - Automatic conversion (RECOMMENDED)")
|
||||||
|
|
||||||
# print("\n💡 Key Benefits:")
|
print("\n💡 Key Benefits:")
|
||||||
# print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
print(" ✓ Full IDE support (autocomplete, syntax highlighting)")
|
||||||
# print(" ✓ Type checking and linting")
|
print(" ✓ Type checking and linting")
|
||||||
# print(" ✓ Easy to test and debug")
|
print(" ✓ Easy to test and debug")
|
||||||
# print(" ✓ Reusable across projects")
|
print(" ✓ Reusable across projects")
|
||||||
# print(" ✓ Complete pipeline control")
|
print(" ✓ Complete pipeline control")
|
||||||
|
|
||||||
# print("\n🎯 8 Hook Points Available:")
|
print("\n🎯 8 Hook Points Available:")
|
||||||
# print(" • on_browser_created, on_page_context_created")
|
print(" • on_browser_created, on_page_context_created")
|
||||||
# print(" • on_user_agent_updated, before_goto, after_goto")
|
print(" • on_user_agent_updated, before_goto, after_goto")
|
||||||
# print(" • on_execution_started, before_retrieve_html, before_return_html")
|
print(" • on_execution_started, before_retrieve_html, before_return_html")
|
||||||
|
|
||||||
# print("\n📚 Resources:")
|
print("\n📚 Resources:")
|
||||||
# print(" • Docs: https://docs.crawl4ai.com")
|
print(" • Docs: https://docs.crawl4ai.com")
|
||||||
# print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
print(" • GitHub: https://github.com/unclecode/crawl4ai")
|
||||||
# print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
print(" • Discord: https://discord.gg/jP8KfhDhyN")
|
||||||
|
|
||||||
# print("\n" + "=" * 70)
|
print("\n" + "=" * 70)
|
||||||
# print(" Happy Crawling with v0.7.5! 🕷️")
|
print(" Happy Crawling with v0.7.5! 🕷️")
|
||||||
# print("=" * 70 + "\n")
|
print("=" * 70 + "\n")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -62,7 +62,33 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"---\n",
|
"---\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## 🔒 Feature 1: HTTPS Preservation for Internal Links\n",
|
"## 🔧 Feature 1: Docker Hooks System (NEW! 🆕)\n",
|
||||||
|
"\n",
|
||||||
|
"### What is it?\n",
|
||||||
|
"v0.7.5 introduces a **completely new Docker Hooks System** that lets you inject custom Python functions at 8 key points in the crawling pipeline. This gives you full control over:\n",
|
||||||
|
"- Authentication setup\n",
|
||||||
|
"- Performance optimization\n",
|
||||||
|
"- Content processing\n",
|
||||||
|
"- Custom behavior at each stage\n",
|
||||||
|
"\n",
|
||||||
|
"### Three Ways to Use Docker Hooks\n",
|
||||||
|
"\n",
|
||||||
|
"The Docker Hooks System offers three approaches, all part of this new feature:\n",
|
||||||
|
"\n",
|
||||||
|
"1. **String-based hooks** - Write hooks as strings for REST API\n",
|
||||||
|
"2. **Using `hooks_to_string()` utility** - Convert Python functions to strings\n",
|
||||||
|
"3. **Docker Client auto-conversion** - Pass functions directly (most convenient)\n",
|
||||||
|
"\n",
|
||||||
|
"All three approaches are NEW in v0.7.5!"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"## 🔒 Feature 2: HTTPS Preservation for Internal Links\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### Problem\n",
|
"### Problem\n",
|
||||||
"When crawling HTTPS sites, internal links sometimes get downgraded to HTTP, breaking authentication and causing security warnings.\n",
|
"When crawling HTTPS sites, internal links sometimes get downgraded to HTTP, breaking authentication and causing security warnings.\n",
|
||||||
@@ -416,7 +442,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"---\n",
|
"---\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## 🤖 Feature 2: Enhanced LLM Integration\n",
|
"## 🤖 Feature 3: Enhanced LLM Integration\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### What's New\n",
|
"### What's New\n",
|
||||||
"- Custom `temperature` parameter for creativity control\n",
|
"- Custom `temperature` parameter for creativity control\n",
|
||||||
@@ -979,32 +1005,6 @@
|
|||||||
"await demo_enhanced_llm()"
|
"await demo_enhanced_llm()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"---\n",
|
|
||||||
"\n",
|
|
||||||
"## 🔧 Feature 3: Docker Hooks System (NEW! 🆕)\n",
|
|
||||||
"\n",
|
|
||||||
"### What is it?\n",
|
|
||||||
"v0.7.5 introduces a **completely new Docker Hooks System** that lets you inject custom Python functions at 8 key points in the crawling pipeline. This gives you full control over:\n",
|
|
||||||
"- Authentication setup\n",
|
|
||||||
"- Performance optimization\n",
|
|
||||||
"- Content processing\n",
|
|
||||||
"- Custom behavior at each stage\n",
|
|
||||||
"\n",
|
|
||||||
"### Three Ways to Use Docker Hooks\n",
|
|
||||||
"\n",
|
|
||||||
"The Docker Hooks System offers three approaches, all part of this new feature:\n",
|
|
||||||
"\n",
|
|
||||||
"1. **String-based hooks** - Write hooks as strings for REST API\n",
|
|
||||||
"2. **Using `hooks_to_string()` utility** - Convert Python functions to strings\n",
|
|
||||||
"3. **Docker Client auto-conversion** - Pass functions directly (most convenient)\n",
|
|
||||||
"\n",
|
|
||||||
"All three approaches are NEW in v0.7.5!"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
Reference in New Issue
Block a user