From 7dfe528d43670163faa5cd89d47520753f44a12d Mon Sep 17 00:00:00 2001
From: Soham Kukreti <kukretisoham@gmail.com>
Date: Fri, 3 Oct 2025 22:00:46 +0530
Subject: [PATCH] fix(docs): standardize C4A-Script tutorial, add CLI
 identity-based crawling, and add sponsorship CTA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Switch installs to pip install -r requirements.txt (tutorial and app docs)
- Update local run steps to python server.py and http://localhost:8000
- Set default PORT to 8000; update port-in-use commands and alt port 8001
- Replace unsupported :contains() example with accessible attribute selector
- Update example URLs in tutorial servers to 127.0.0.1:8000
- Add “Identity-based crawling” section with crwl profiles CLI workflow and code usage
- Replace legacy-docs note with sponsorship message in docs/md_v2/index.md
- Minor copy and consistency fixes across pages
---
 docs/examples/c4a_script/tutorial/README.md   | 10 +++---
 docs/examples/c4a_script/tutorial/server.py   |  2 +-
 .../md_v2/advanced/identity-based-crawling.md | 36 +++++++++++++++++++
 docs/md_v2/apps/c4a-script/README.md          | 10 +++---
 docs/md_v2/apps/c4a-script/server.py          |  4 +--
 docs/md_v2/core/c4a-script.md                 | 10 +++---
 docs/md_v2/index.md                           |  2 +-
 7 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/docs/examples/c4a_script/tutorial/README.md b/docs/examples/c4a_script/tutorial/README.md
index 81f855ee..2d6940bb 100644
--- a/docs/examples/c4a_script/tutorial/README.md
+++ b/docs/examples/c4a_script/tutorial/README.md
@@ -18,7 +18,7 @@ A comprehensive web-based tutorial for learning and experimenting with C4A-Scrip
 
 2. **Install Dependencies**
    ```bash
-   pip install flask
+   pip install -r requirements.txt
    ```
 
 3. **Launch the Server**
@@ -28,7 +28,7 @@ A comprehensive web-based tutorial for learning and experimenting with C4A-Scrip
 
 4. **Open in Browser**
    ```
-   http://localhost:8080
+   http://localhost:8000
    ```
 
 **🌐 Try Online**: [Live Demo](https://docs.crawl4ai.com/c4a-script/demo)
@@ -325,7 +325,7 @@ Powers the recording functionality:
 ### Configuration
 ```python
 # server.py configuration
-PORT = 8080
+PORT = 8000
 DEBUG = True
 THREADED = True
 ```
@@ -343,9 +343,9 @@ THREADED = True
 **Port Already in Use**
 ```bash
 # Kill existing process
-lsof -ti:8080 | xargs kill -9
+lsof -ti:8000 | xargs kill -9
 # Or use different port
-python server.py --port 8081
+python server.py --port 8001
 ```
 
 **Blockly Not Loading**
diff --git a/docs/examples/c4a_script/tutorial/server.py b/docs/examples/c4a_script/tutorial/server.py
index f9cb81e9..2537e4c3 100644
--- a/docs/examples/c4a_script/tutorial/server.py
+++ b/docs/examples/c4a_script/tutorial/server.py
@@ -216,7 +216,7 @@ def get_examples():
             'name': 'Handle Cookie Banner',
             'description': 'Accept cookies and close newsletter popup',
             'script': '''# Handle cookie banner and newsletter
-GO http://127.0.0.1:8080/playground/
+GO http://127.0.0.1:8000/playground/
 WAIT `body` 2
 IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
 IF (EXISTS `.newsletter-popup`) THEN CLICK `.close`'''
diff --git a/docs/md_v2/advanced/identity-based-crawling.md b/docs/md_v2/advanced/identity-based-crawling.md
index 3864f840..2b155857 100644
--- a/docs/md_v2/advanced/identity-based-crawling.md
+++ b/docs/md_v2/advanced/identity-based-crawling.md
@@ -82,6 +82,42 @@ If you installed Crawl4AI (which installs Playwright under the hood), you alread
 
 ---
 
+### Creating a Profile Using the Crawl4AI CLI (Easiest)
+
+If you prefer a guided, interactive setup, use the built-in CLI to create and manage persistent browser profiles.
+
+1.⠀Launch the profile manager:
+   ```bash
+   crwl profiles
+   ```
+
+2.⠀Choose "Create new profile" and enter a profile name. A Chromium window opens so you can log in to sites and configure settings. When finished, return to the terminal and press `q` to save the profile.
+
+3.⠀Profiles are saved under `~/.crawl4ai/profiles/<profile_name>` (for example: `/home/<you>/.crawl4ai/profiles/test_profile_1`) along with a `storage_state.json` for cookies and session data.
+
+4.⠀Optionally, choose "List profiles" in the CLI to view available profiles and their paths.
+
+5.⠀Use the saved path with `BrowserConfig.user_data_dir`:
+   ```python
+   from crawl4ai import AsyncWebCrawler, BrowserConfig
+
+   profile_path = "/home/<you>/.crawl4ai/profiles/test_profile_1"
+
+   browser_config = BrowserConfig(
+       headless=True,
+       use_managed_browser=True,
+       user_data_dir=profile_path,
+       browser_type="chromium",
+   )
+
+   async with AsyncWebCrawler(config=browser_config) as crawler:
+       result = await crawler.arun(url="https://example.com/private")
+   ```
+
+The CLI also supports listing and deleting profiles, and even testing a crawl directly from the menu.
+
+---
+
 ## 3. Using Managed Browsers in Crawl4AI
 
 Once you have a data directory with your session data, pass it to **`BrowserConfig`**:
diff --git a/docs/md_v2/apps/c4a-script/README.md b/docs/md_v2/apps/c4a-script/README.md
index 81f855ee..2d6940bb 100644
--- a/docs/md_v2/apps/c4a-script/README.md
+++ b/docs/md_v2/apps/c4a-script/README.md
@@ -18,7 +18,7 @@ A comprehensive web-based tutorial for learning and experimenting with C4A-Scrip
 
 2. **Install Dependencies**
    ```bash
-   pip install flask
+   pip install -r requirements.txt
    ```
 
 3. **Launch the Server**
@@ -28,7 +28,7 @@ A comprehensive web-based tutorial for learning and experimenting with C4A-Scrip
 
 4. **Open in Browser**
    ```
-   http://localhost:8080
+   http://localhost:8000
    ```
 
 **🌐 Try Online**: [Live Demo](https://docs.crawl4ai.com/c4a-script/demo)
@@ -325,7 +325,7 @@ Powers the recording functionality:
 ### Configuration
 ```python
 # server.py configuration
-PORT = 8080
+PORT = 8000
 DEBUG = True
 THREADED = True
 ```
@@ -343,9 +343,9 @@ THREADED = True
 **Port Already in Use**
 ```bash
 # Kill existing process
-lsof -ti:8080 | xargs kill -9
+lsof -ti:8000 | xargs kill -9
 # Or use different port
-python server.py --port 8081
+python server.py --port 8001
 ```
 
 **Blockly Not Loading**
diff --git a/docs/md_v2/apps/c4a-script/server.py b/docs/md_v2/apps/c4a-script/server.py
index 6242789d..2537e4c3 100644
--- a/docs/md_v2/apps/c4a-script/server.py
+++ b/docs/md_v2/apps/c4a-script/server.py
@@ -216,7 +216,7 @@ def get_examples():
             'name': 'Handle Cookie Banner',
             'description': 'Accept cookies and close newsletter popup',
             'script': '''# Handle cookie banner and newsletter
-GO http://127.0.0.1:8080/playground/
+GO http://127.0.0.1:8000/playground/
 WAIT `body` 2
 IF (EXISTS `.cookie-banner`) THEN CLICK `.accept`
 IF (EXISTS `.newsletter-popup`) THEN CLICK `.close`'''
@@ -283,7 +283,7 @@ WAIT `.success-message` 5'''
     return jsonify(examples)
 
 if __name__ == '__main__':
-    port = int(os.environ.get('PORT', 8080))
+    port = int(os.environ.get('PORT', 8000))
     print(f"""
 ╔══════════════════════════════════════════════════════════╗
 ║          C4A-Script Interactive Tutorial Server          ║
diff --git a/docs/md_v2/core/c4a-script.md b/docs/md_v2/core/c4a-script.md
index d92e426e..1af3da4e 100644
--- a/docs/md_v2/core/c4a-script.md
+++ b/docs/md_v2/core/c4a-script.md
@@ -69,12 +69,12 @@ The tutorial includes a Flask-based web interface with:
 cd docs/examples/c4a_script/tutorial/
 
 # Install dependencies
-pip install flask
+pip install -r requirements.txt
 
 # Launch the tutorial server
-python app.py
+python server.py
 
-# Open http://localhost:5000 in your browser
+# Open http://localhost:8000 in your browser
 ```
 
 ## Core Concepts
@@ -111,8 +111,8 @@ CLICK `.submit-btn`
 # By attribute
 CLICK `button[type="submit"]`
 
-# By text content
-CLICK `button:contains("Sign In")`
+# By accessible attributes
+CLICK `button[aria-label="Search"][title="Search"]`
 
 # Complex selectors
 CLICK `.form-container input[name="email"]`
diff --git a/docs/md_v2/index.md b/docs/md_v2/index.md
index d497ca89..e7566e7b 100644
--- a/docs/md_v2/index.md
+++ b/docs/md_v2/index.md
@@ -57,7 +57,7 @@
 
 Crawl4AI is the #1 trending GitHub repository, actively maintained by a vibrant community. It delivers blazing-fast, AI-ready web crawling tailored for large language models, AI agents, and data pipelines. Fully open source, flexible, and built for real-time performance, **Crawl4AI** empowers developers with unmatched speed, precision, and deployment ease.
 
-> **Note**: If you're looking for the old documentation, you can access it [here](https://old.docs.crawl4ai.com).
+> Enjoy using Crawl4AI? Consider **[becoming a sponsor](https://github.com/sponsors/unclecode)** to support ongoing development and community growth!
 
 ## 🎯 New: Adaptive Web Crawling