refactor(docs): reorganize documentation structure and update styles

Reorganize documentation into core/advanced/extraction sections for better navigation.
Update terminal theme styles and add rich library for better CLI output.
Remove redundant tutorial files and consolidate content into core sections.
Add personal story to index page for project context.

BREAKING CHANGE: Documentation structure has been significantly reorganized
This commit is contained in:
UncleCode
2025-01-07 20:49:50 +08:00
parent ae376f15fb
commit ca3e33122e
87 changed files with 4869 additions and 8951 deletions

View File

@@ -1,5 +1,5 @@
site_name: Crawl4AI Documentation
site_description: 🔥🕷️ Crawl4AI, Open-source LLM Friendly Web Crawler & Scrapper
site_description: 🚀🤖 Crawl4AI, Open-source LLM-Friendly Web Crawler & Scraper
site_url: https://docs.crawl4ai.com
repo_url: https://github.com/unclecode/crawl4ai
repo_name: unclecode/crawl4ai
@@ -7,67 +7,50 @@ docs_dir: docs/md_v2
nav:
- Home: 'index.md'
- 'Installation': 'basic/installation.md'
- 'Docker Deplotment': 'basic/docker-deploymeny.md'
- 'Quick Start': 'basic/quickstart.md'
- Changelog & Blog:
- 'Blog Home': 'blog/index.md'
- 'Latest (0.4.1)': 'blog/releases/0.4.1.md'
- 'Changelog': 'https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md'
- Basic:
- 'Simple Crawling': 'basic/simple-crawling.md'
- 'Output Formats': 'basic/output-formats.md'
- 'Browser Configuration': 'basic/browser-config.md'
- 'Page Interaction': 'basic/page-interaction.md'
- 'Content Selection': 'basic/content-selection.md'
- 'Cache Modes': 'basic/cache-modes.md'
- Setup & Installation:
- "Installation": "core/installation.md"
- "Docker Deployment": "core/docker-deploymeny.md"
- "Quick Start": "core/quickstart.md"
- "Blog & Changelog":
- "Blog Home": "blog/index.md"
- "Changelog": "https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md"
- Core:
- "Simple Crawling": "core/simple-crawling.md"
- "Crawler Result": "core/crawler-result.md"
- "Browser & Crawler Config": "core/browser-crawler-config.md"
- "Markdown Generation": "core/markdown-generation.md"
- "Fit Markdown": "core/fit-markdown.md"
- "Page Interaction": "core/page-interaction.md"
- "Content Selection": "core/content-selection.md"
- "Cache Modes": "core/cache-modes.md"
- "Local Files & Raw HTML": "core/local-files.md"
- "Link & Media": "core/link-media.md"
- Advanced:
- 'Content Processing': 'advanced/content-processing.md'
- 'Magic Mode': 'advanced/magic-mode.md'
- 'Hooks & Auth': 'advanced/hooks-auth.md'
- 'Proxy & Security': 'advanced/proxy-security.md'
- 'Session Management': 'advanced/session-management.md'
- 'Session Management (Advanced)': 'advanced/session-management-advanced.md'
- "Overview": "advanced/advanced-features.md"
- "File Downloading": "advanced/file-downloading.md"
- "Lazy Loading": "advanced/lazy-loading.md"
- "Hooks & Auth": "advanced/hooks-auth.md"
- "Proxy & Security": "advanced/proxy-security.md"
- "Session Management": "advanced/session-management.md"
- "Multi-URL Crawling": "advanced/multi-url-crawling.md"
- "Crawl Dispatcher": "advanced/crawl-dispatcher.md"
- "Identity Based Crawling": "advanced/identity-based-crawling.md"
- "SSL Certificate": "advanced/ssl-certificate.md"
- Extraction:
- 'Overview': 'extraction/overview.md'
- 'LLM Strategy': 'extraction/llm.md'
- 'Json-CSS Extractor Basic': 'extraction/css.md'
- 'Json-CSS Extractor Advanced': 'extraction/css-advanced.md'
- 'Cosine Strategy': 'extraction/cosine.md'
- 'Chunking': 'extraction/chunking.md'
- "LLM-Free Strategies": "extraction/no-llm-strategies.md"
- "LLM Strategies": "extraction/llm-strategies.md"
- "Clustering Strategies": "extraction/clustring-strategies.md"
- "Chunking": "extraction/chunking.md"
- API Reference:
- 'Parameters Table': 'api/parameters.md'
- 'AsyncWebCrawler': 'api/async-webcrawler.md'
- 'AsyncWebCrawler.arun()': 'api/arun.md'
- 'CrawlResult': 'api/crawl-result.md'
- 'Strategies': 'api/strategies.md'
- Tutorial:
- '1. Getting Started': 'tutorial/episode_01_Introduction_to_Crawl4AI_and_Basic_Installation.md'
- '2. Advanced Features': 'tutorial/episode_02_Overview_of_Advanced_Features.md'
- '3. Browser Setup': 'tutorial/episode_03_Browser_Configurations_&_Headless_Crawling.md'
- '4. Proxy Settings': 'tutorial/episode_04_Advanced_Proxy_and_Security_Settings.md'
- '5. Dynamic Content': 'tutorial/episode_05_JavaScript_Execution_and_Dynamic_Content_Handling.md'
- '6. Magic Mode': 'tutorial/episode_06_Magic_Mode_and_Anti-Bot_Protection.md'
- '7. Content Cleaning': 'tutorial/episode_07_Content_Cleaning_and_Fit_Markdown.md'
- '8. Media Handling': 'tutorial/episode_08_Media_Handling_Images_Videos_and_Audio.md'
- '9. Link Analysis': 'tutorial/episode_09_Link_Analysis_and_Smart_Filtering.md'
- '10. User Simulation': 'tutorial/episode_10_Custom_Headers,_Identity,_and_User_Simulation.md'
- '11.1. JSON CSS': 'tutorial/episode_11_1_Extraction_Strategies_JSON_CSS.md'
- '11.2. LLM Strategy': 'tutorial/episode_11_2_Extraction_Strategies_LLM.md'
- '11.3. Cosine Strategy': 'tutorial/episode_11_3_Extraction_Strategies_Cosine.md'
- '12. Session Crawling': 'tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites.md'
- '13. Text Chunking': 'tutorial/episode_13_Chunking_Strategies_for_Large_Text_Processing.md'
- '14. Custom Workflows': 'tutorial/episode_14_Hooks_and_Custom_Workflow_with_AsyncWebCrawler.md'
- "AsyncWebCrawler": "api/async-webcrawler.md"
- "arun()": "api/arun.md"
- "Browser & Crawler Config": "api/parameters.md"
- "CrawlResult": "api/crawl-result.md"
- "Strategies": "api/strategies.md"
theme:
name: terminal
palette: dark
name: 'terminal'
palette: 'dark'
markdown_extensions:
- pymdownx.highlight:
@@ -87,4 +70,4 @@ extra_css:
extra_javascript:
- assets/highlight.min.js
- assets/highlight_init.js
- assets/highlight_init.js