feat: Add comprehensive website to API example with frontend

This commit adds a complete, web scraping API example that demonstrates how to get structured data from any website and use it like an API using the crawl4ai library with a minimalist frontend interface. Core Functionality - AI-powered web scraping with plain English queries - Dual scraping approaches: Schema-based (faster) and LLM-based (flexible) - Intelligent schema caching for improved performance - Custom LLM model support with API key management - Automatic duplicate request prevention Modern Frontend Interface - Minimalist black-and-white design inspired by modern web apps - Responsive layout with smooth animations and transitions - Three main pages: Scrape Data, Models Management, API Request History - Real-time results display with JSON formatting - Copy-to-clipboard functionality for extracted data - Toast notifications for user feedback - Auto-scroll to results when scraping starts Model Management System - Web-based model configuration interface - Support for any LLM provider (OpenAI, Gemini, Anthropic, etc.) - Simplified configuration requiring only provider and API token - Add, list, and delete model configurations - Secure storage of API keys in local JSON files API Request History - Automatic saving of all API requests and responses - Display of request history with URL, query, and cURL commands - Duplicate prevention (same URL + query combinations) - Request deletion functionality - Clean, simplified display focusing on essential information Technical Implementation Backend (FastAPI) - RESTful API with comprehensive endpoints - Pydantic models for request/response validation - Async web scraping with crawl4ai library - Error handling with detailed error messages - File-based storage for models and request history Frontend (Vanilla JS/CSS/HTML) - No framework dependencies - pure HTML, CSS, JavaScript - Modern CSS Grid and Flexbox layouts - Custom dropdown styling with SVG arrows - Responsive design for mobile and desktop - Smooth scrolling and animations Core Library Integration - WebScraperAgent class for orchestration - ModelConfig class for LLM configuration management - Schema generation and caching system - LLM extraction strategy support - Browser configuration with headless mode
2025-08-24 18:20:15 +05:30
parent 40ab287c90
commit b1dff5a4d3
12 changed files with 2749 additions and 0 deletions
--- a/docs/examples/website-to-api/.gitignore
+++ b/docs/examples/website-to-api/.gitignore
@@ -0,0 +1,221 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# Redis 
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
+
+#directories
+models
+schemas
+saved_requests
--- a/docs/examples/website-to-api/README.md
+++ b/docs/examples/website-to-api/README.md
@@ -0,0 +1,252 @@
+# Web Scraper API with Custom Model Support
+
+A powerful web scraping API that converts any website into structured data using AI. Features a beautiful minimalist frontend interface and support for custom LLM models!
+
+## Features
+
+- **AI-Powered Scraping**: Provide a URL and plain English query to extract structured data
+- **Beautiful Frontend**: Modern minimalist black-and-white interface with smooth UX
+- **Custom Model Support**: Use any LLM provider (OpenAI, Gemini, Anthropic, etc.) with your own API keys
+- **Model Management**: Save, list, and manage multiple model configurations via web interface
+- **Dual Scraping Approaches**: Choose between Schema-based (faster) or LLM-based (more flexible) extraction
+- **API Request History**: Automatic saving and display of all API requests with cURL commands
+- **Schema Caching**: Intelligent caching of generated schemas for faster subsequent requests
+- **Duplicate Prevention**: Avoids saving duplicate requests (same URL + query)
+- **RESTful API**: Easy-to-use HTTP endpoints for all operations
+
+## Quick Start
+
+### 1. Install Dependencies
+
+```bash
+pip install -r requirements.txt
+```
+
+### 2. Start the API Server
+
+```bash
+python app.py
+```
+
+The server will start on `http://localhost:8000` with a beautiful web interface!
+
+### 3. Using the Web Interface
+
+Once the server is running, open your browser and go to `http://localhost:8000` to access the modern web interface!
+
+#### Pages:
+- **Scrape Data**: Enter URLs and queries to extract structured data
+- **Models**: Manage your AI model configurations (add, list, delete)
+- **API Requests**: View history of all scraping requests with cURL commands
+
+#### Features:
+- **Minimalist Design**: Clean black-and-white theme inspired by modern web apps
+- **Real-time Results**: See extracted data in formatted JSON
+- **Copy to Clipboard**: Easy copying of results
+- **Toast Notifications**: User-friendly feedback
+- **Dual Scraping Modes**: Choose between Schema-based and LLM-based approaches
+
+## Model Management
+
+### Adding Models via Web Interface
+
+1. Go to the **Models** page
+2. Enter your model details:
+   - **Provider**: LLM provider (e.g., `gemini/gemini-2.5-flash`, `openai/gpt-4o`)
+   - **API Token**: Your API key for the provider
+3. Click "Add Model"
+
+### API Usage for Model Management
+
+#### Save a Model Configuration
+
+```bash
+curl -X POST "http://localhost:8000/models" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "provider": "gemini/gemini-2.5-flash",
+    "api_token": "your-api-key-here"
+  }'
+```
+
+#### List Saved Models
+
+```bash
+curl -X GET "http://localhost:8000/models"
+```
+
+#### Delete a Model Configuration
+
+```bash
+curl -X DELETE "http://localhost:8000/models/my-gemini"
+```
+
+## Scraping Approaches
+
+### 1. Schema-based Scraping (Faster)
+- Generates CSS selectors for targeted extraction
+- Caches schemas for repeated requests
+- Faster execution for structured websites
+
+### 2. LLM-based Scraping (More Flexible)
+- Direct LLM extraction without schema generation
+- More flexible for complex or dynamic content
+- Better for unstructured data extraction
+
+## Supported LLM Providers
+
+The API supports any LLM provider that crawl4ai supports, including:
+
+- **Google Gemini**: `gemini/gemini-2.5-flash`, `gemini/gemini-pro`
+- **OpenAI**: `openai/gpt-4`, `openai/gpt-3.5-turbo`
+- **Anthropic**: `anthropic/claude-3-opus`, `anthropic/claude-3-sonnet`
+- **And more...**
+
+## API Endpoints
+
+### Core Endpoints
+
+- `POST /scrape` - Schema-based scraping
+- `POST /scrape-with-llm` - LLM-based scraping
+- `GET /schemas` - List cached schemas
+- `POST /clear-cache` - Clear schema cache
+- `GET /health` - Health check
+
+### Model Management Endpoints
+
+- `GET /models` - List saved model configurations
+- `POST /models` - Save a new model configuration
+- `DELETE /models/{model_name}` - Delete a model configuration
+
+### API Request History
+
+- `GET /saved-requests` - List all saved API requests
+- `DELETE /saved-requests/{request_id}` - Delete a saved request
+
+## Request/Response Examples
+
+### Scrape Request
+
+```json
+{
+  "url": "https://example.com",
+  "query": "Extract the product name, price, and description",
+  "model_name": "my-custom-model"
+}
+```
+
+### Scrape Response
+
+```json
+{
+  "success": true,
+  "url": "https://example.com",
+  "query": "Extract the product name, price, and description",
+  "extracted_data": {
+    "product_name": "Example Product",
+    "price": "$99.99",
+    "description": "This is an example product description"
+  },
+  "schema_used": { ... },
+  "timestamp": "2024-01-01T12:00:00Z"
+}
+```
+
+### Model Configuration Request
+
+```json
+{
+  "provider": "gemini/gemini-2.5-flash",
+  "api_token": "your-api-key-here"
+}
+```
+
+## Testing
+
+Run the test script to verify the model management functionality:
+
+```bash
+python test_models.py
+```
+
+## File Structure
+
+```
+parse_example/
+├── api_server.py          # FastAPI server with all endpoints
+├── web_scraper_lib.py     # Core scraping library
+├── test_models.py         # Test script for model management
+├── requirements.txt       # Dependencies
+├── static/               # Frontend files
+│   ├── index.html        # Main HTML interface
+│   ├── styles.css        # CSS styles (minimalist theme)
+│   └── script.js         # JavaScript functionality
+├── schemas/              # Cached schemas
+├── models/               # Saved model configurations
+├── saved_requests/       # API request history
+└── README.md            # This file
+```
+
+## Advanced Usage
+
+### Using the Library Directly
+
+```python
+from web_scraper_lib import WebScraperAgent
+
+# Initialize agent
+agent = WebScraperAgent()
+
+# Save a model configuration
+agent.save_model_config(
+    model_name="my-model",
+    provider="openai/gpt-4",
+    api_token="your-api-key"
+)
+
+# Schema-based scraping
+result = await agent.scrape_data(
+    url="https://example.com",
+    query="Extract product information",
+    model_name="my-model"
+)
+
+# LLM-based scraping
+result = await agent.scrape_data_with_llm(
+    url="https://example.com",
+    query="Extract product information",
+    model_name="my-model"
+)
+```
+
+### Schema Caching
+
+The system automatically caches generated schemas based on URL and query combinations:
+
+- **First request**: Generates schema using AI
+- **Subsequent requests**: Uses cached schema for faster extraction
+
+### API Request History
+
+All API requests are automatically saved with:
+- Request details (URL, query, model used)
+- Response data
+- Timestamp
+- cURL command for re-execution
+
+### Duplicate Prevention
+
+The system prevents saving duplicate requests:
+- Same URL + query combinations are not saved multiple times
+- Returns existing request ID for duplicates
+- Keeps the API request history clean
+
+## Error Handling
+
+The API provides detailed error messages for common issues:
+
+- Invalid URLs
+- Missing model configurations
+- API key errors
+- Network timeouts
+- Parsing errors
--- a/docs/examples/website-to-api/api_server.py
+++ b/docs/examples/website-to-api/api_server.py
@@ -0,0 +1,363 @@
+from fastapi import FastAPI, HTTPException
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from pydantic import BaseModel, HttpUrl
+from typing import Dict, Any, Optional, Union, List
+import uvicorn
+import asyncio
+import os
+import json
+from datetime import datetime
+from web_scraper_lib import WebScraperAgent, scrape_website
+
+app = FastAPI(
+    title="Web Scraper API",
+    description="Convert any website into a structured data API. Provide a URL and tell AI what data you need in plain English.",
+    version="1.0.0"
+)
+
+# Mount static files
+if os.path.exists("static"):
+    app.mount("/static", StaticFiles(directory="static"), name="static")
+
+# Mount assets directory
+if os.path.exists("assets"):
+    app.mount("/assets", StaticFiles(directory="assets"), name="assets")
+
+# Initialize the scraper agent
+scraper_agent = WebScraperAgent()
+
+# Create directory for saved API requests
+os.makedirs("saved_requests", exist_ok=True)
+
+class ScrapeRequest(BaseModel):
+    url: HttpUrl
+    query: str
+    model_name: Optional[str] = None
+
+class ModelConfigRequest(BaseModel):
+    model_name: str
+    provider: str
+    api_token: str
+
+class ScrapeResponse(BaseModel):
+    success: bool
+    url: str
+    query: str
+    extracted_data: Union[Dict[str, Any], list]
+    schema_used: Optional[Dict[str, Any]] = None
+    timestamp: Optional[str] = None
+    error: Optional[str] = None
+
+class SavedApiRequest(BaseModel):
+    id: str
+    endpoint: str
+    method: str
+    headers: Dict[str, str]
+    body: Dict[str, Any]
+    timestamp: str
+    response: Optional[Dict[str, Any]] = None
+
+def save_api_request(endpoint: str, method: str, headers: Dict[str, str], body: Dict[str, Any], response: Optional[Dict[str, Any]] = None) -> str:
+    """Save an API request to a JSON file."""
+    
+    # Check for duplicate requests (same URL and query)
+    if endpoint in ["/scrape", "/scrape-with-llm"] and "url" in body and "query" in body:
+        existing_requests = get_saved_requests()
+        for existing_request in existing_requests:
+            if (existing_request.endpoint == endpoint and 
+                existing_request.body.get("url") == body["url"] and 
+                existing_request.body.get("query") == body["query"]):
+                print(f"Duplicate request found for URL: {body['url']} and query: {body['query']}")
+                return existing_request.id  # Return existing request ID instead of creating new one
+    
+    request_id = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
+    
+    saved_request = SavedApiRequest(
+        id=request_id,
+        endpoint=endpoint,
+        method=method,
+        headers=headers,
+        body=body,
+        timestamp=datetime.now().isoformat(),
+        response=response
+    )
+    
+    file_path = os.path.join("saved_requests", f"{request_id}.json")
+    with open(file_path, "w") as f:
+        json.dump(saved_request.dict(), f, indent=2)
+    
+    return request_id
+
+def get_saved_requests() -> List[SavedApiRequest]:
+    """Get all saved API requests."""
+    requests = []
+    if os.path.exists("saved_requests"):
+        for filename in os.listdir("saved_requests"):
+            if filename.endswith('.json'):
+                file_path = os.path.join("saved_requests", filename)
+                try:
+                    with open(file_path, "r") as f:
+                        data = json.load(f)
+                        requests.append(SavedApiRequest(**data))
+                except Exception as e:
+                    print(f"Error loading saved request {filename}: {e}")
+    
+    # Sort by timestamp (newest first)
+    requests.sort(key=lambda x: x.timestamp, reverse=True)
+    return requests
+
+@app.get("/")
+async def root():
+    """Serve the frontend interface."""
+    if os.path.exists("static/index.html"):
+        return FileResponse("static/index.html")
+    else:
+        return {
+            "message": "Web Scraper API",
+            "description": "Convert any website into structured data with AI",
+            "endpoints": {
+                "/scrape": "POST - Scrape data from a website",
+                "/schemas": "GET - List cached schemas",
+                "/clear-cache": "POST - Clear schema cache",
+                "/models": "GET - List saved model configurations",
+                "/models": "POST - Save a new model configuration",
+                "/models/{model_name}": "DELETE - Delete a model configuration",
+                "/saved-requests": "GET - List saved API requests"
+            }
+        }
+
+@app.post("/scrape", response_model=ScrapeResponse)
+async def scrape_website_endpoint(request: ScrapeRequest):
+    """
+    Scrape structured data from any website.
+    
+    This endpoint:
+    1. Takes a URL and plain English query
+    2. Generates a custom scraper using AI
+    3. Returns structured data
+    """
+    try:
+        # Save the API request
+        headers = {"Content-Type": "application/json"}
+        body = {
+            "url": str(request.url),
+            "query": request.query,
+            "model_name": request.model_name
+        }
+        
+        result = await scraper_agent.scrape_data(
+            url=str(request.url),
+            query=request.query,
+            model_name=request.model_name
+        )
+        
+        response_data = ScrapeResponse(
+            success=True,
+            url=result["url"],
+            query=result["query"],
+            extracted_data=result["extracted_data"],
+            schema_used=result["schema_used"],
+            timestamp=result["timestamp"]
+        )
+        
+        # Save the request with response
+        save_api_request(
+            endpoint="/scrape",
+            method="POST",
+            headers=headers,
+            body=body,
+            response=response_data.dict()
+        )
+        
+        return response_data
+    
+    except Exception as e:
+        # Save the failed request
+        headers = {"Content-Type": "application/json"}
+        body = {
+            "url": str(request.url),
+            "query": request.query,
+            "model_name": request.model_name
+        }
+        
+        save_api_request(
+            endpoint="/scrape",
+            method="POST",
+            headers=headers,
+            body=body,
+            response={"error": str(e)}
+        )
+        
+        raise HTTPException(status_code=500, detail=f"Scraping failed: {str(e)}")
+
+@app.post("/scrape-with-llm", response_model=ScrapeResponse)
+async def scrape_website_endpoint_with_llm(request: ScrapeRequest):
+    """
+    Scrape structured data from any website using a custom LLM model.
+    """
+    try:
+        # Save the API request
+        headers = {"Content-Type": "application/json"}
+        body = {
+            "url": str(request.url),
+            "query": request.query,
+            "model_name": request.model_name
+        }
+        
+        result = await scraper_agent.scrape_data_with_llm(
+            url=str(request.url),
+            query=request.query,
+            model_name=request.model_name
+        )
+        
+        response_data = ScrapeResponse(
+            success=True,
+            url=result["url"],
+            query=result["query"],
+            extracted_data=result["extracted_data"],
+            timestamp=result["timestamp"]
+        )
+        
+        # Save the request with response
+        save_api_request(
+            endpoint="/scrape-with-llm",
+            method="POST",
+            headers=headers,
+            body=body,
+            response=response_data.dict()
+        )
+        
+        return response_data
+    
+    except Exception as e:
+        # Save the failed request
+        headers = {"Content-Type": "application/json"}
+        body = {
+            "url": str(request.url),
+            "query": request.query,
+            "model_name": request.model_name
+        }
+        
+        save_api_request(
+            endpoint="/scrape-with-llm",
+            method="POST",
+            headers=headers,
+            body=body,
+            response={"error": str(e)}
+        )
+        
+        raise HTTPException(status_code=500, detail=f"Scraping failed: {str(e)}")
+
+@app.get("/saved-requests")
+async def list_saved_requests():
+    """List all saved API requests."""
+    try:
+        requests = get_saved_requests()
+        return {
+            "success": True,
+            "requests": [req.dict() for req in requests],
+            "count": len(requests)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to list saved requests: {str(e)}")
+
+@app.delete("/saved-requests/{request_id}")
+async def delete_saved_request(request_id: str):
+    """Delete a saved API request."""
+    try:
+        file_path = os.path.join("saved_requests", f"{request_id}.json")
+        if os.path.exists(file_path):
+            os.remove(file_path)
+            return {
+                "success": True,
+                "message": f"Saved request '{request_id}' deleted successfully"
+            }
+        else:
+            raise HTTPException(status_code=404, detail=f"Saved request '{request_id}' not found")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to delete saved request: {str(e)}")
+
+@app.get("/schemas")
+async def list_cached_schemas():
+    """List all cached schemas."""
+    try:
+        schemas = await scraper_agent.get_cached_schemas()
+        return {
+            "success": True,
+            "cached_schemas": schemas,
+            "count": len(schemas)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to list schemas: {str(e)}")
+
+@app.post("/clear-cache")
+async def clear_schema_cache():
+    """Clear all cached schemas."""
+    try:
+        scraper_agent.clear_cache()
+        return {
+            "success": True,
+            "message": "Schema cache cleared successfully"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to clear cache: {str(e)}")
+
+@app.get("/models")
+async def list_models():
+    """List all saved model configurations."""
+    try:
+        models = scraper_agent.list_saved_models()
+        return {
+            "success": True,
+            "models": models,
+            "count": len(models)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to list models: {str(e)}")
+
+@app.post("/models")
+async def save_model_config(request: ModelConfigRequest):
+    """Save a new model configuration."""
+    try:
+        success = scraper_agent.save_model_config(
+            model_name=request.model_name,
+            provider=request.provider,
+            api_token=request.api_token
+        )
+        
+        if success:
+            return {
+                "success": True,
+                "message": f"Model configuration '{request.model_name}' saved successfully"
+            }
+        else:
+            raise HTTPException(status_code=500, detail="Failed to save model configuration")
+    
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to save model: {str(e)}")
+
+@app.delete("/models/{model_name}")
+async def delete_model_config(model_name: str):
+    """Delete a model configuration."""
+    try:
+        success = scraper_agent.delete_model_config(model_name)
+        
+        if success:
+            return {
+                "success": True,
+                "message": f"Model configuration '{model_name}' deleted successfully"
+            }
+        else:
+            raise HTTPException(status_code=404, detail=f"Model configuration '{model_name}' not found")
+    
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to delete model: {str(e)}")
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy", "service": "web-scraper-api"}
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/docs/examples/website-to-api/app.py
+++ b/docs/examples/website-to-api/app.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+"""
+Startup script for the Web Scraper API with frontend interface.
+"""
+
+import os
+import sys
+import uvicorn
+from pathlib import Path
+
+def main():
+    # Check if static directory exists
+    static_dir = Path("static")
+    if not static_dir.exists():
+        print("❌ Static directory not found!")
+        print("Please make sure the 'static' directory exists with the frontend files.")
+        sys.exit(1)
+    
+    # Check if required frontend files exist
+    required_files = ["index.html", "styles.css", "script.js"]
+    missing_files = []
+    
+    for file in required_files:
+        if not (static_dir / file).exists():
+            missing_files.append(file)
+    
+    if missing_files:
+        print(f"❌ Missing frontend files: {', '.join(missing_files)}")
+        print("Please make sure all frontend files are present in the static directory.")
+        sys.exit(1)
+    
+    print("🚀 Starting Web Scraper API with Frontend Interface")
+    print("=" * 50)
+    print("📁 Static files found and ready to serve")
+    print("🌐 Frontend will be available at: http://localhost:8000")
+    print("🔌 API endpoints available at: http://localhost:8000/docs")
+    print("=" * 50)
+    
+    # Start the server
+    uvicorn.run(
+        "api_server:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )
+
+if __name__ == "__main__":
+    main() 
--- a/docs/examples/website-to-api/assets/crawl4ai_logo.jpg
+++ b/docs/examples/website-to-api/assets/crawl4ai_logo.jpg
--- a/docs/examples/website-to-api/requirements.txt
+++ b/docs/examples/website-to-api/requirements.txt
@@ -0,0 +1,5 @@
+crawl4ai
+fastapi
+uvicorn
+pydantic
+litellm
--- a/docs/examples/website-to-api/static/index.html
+++ b/docs/examples/website-to-api/static/index.html
@@ -0,0 +1,201 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Web2API Example</title>
+    <link rel="stylesheet" href="/static/styles.css">
+    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
+</head>
+<body>
+    <!-- Header -->
+    <header class="header">
+        <div class="header-content">
+            <div class="logo">
+                <img src="/assets/crawl4ai_logo.jpg" alt="Crawl4AI Logo" class="logo-image">
+                <span>Web2API Example</span>
+            </div>
+            <nav class="nav-links">
+                <a href="#" class="nav-link active" data-page="scrape">Scrape</a>
+                <a href="#" class="nav-link" data-page="models">Models</a>
+                <a href="#" class="nav-link" data-page="requests">API Requests</a>
+            </nav>
+        </div>
+    </header>
+
+    <!-- Main Content -->
+    <main class="main-content">
+        <!-- Scrape Page -->
+        <div id="scrape-page" class="page active">
+            <div class="hero-section">
+                <h1 class="hero-title">Turn Any Website Into An API</h1>
+                <p class="hero-subtitle">This example shows how to turn any website into an API using Crawl4AI.</p>
+            </div>
+
+            <!-- Workflow Demonstration -->
+            <div class="workflow-demo">
+                <div class="workflow-step">
+                    <h3 class="step-title">1. Your Request</h3>
+                    <div class="request-box">
+                        <div class="input-group">
+                            <label>URL:</label>
+                            <input type="url" id="url" name="url" placeholder="https://example-bookstore.com/new-releases" required>
+                        </div>
+                        <div class="input-group">
+                            <label>QUERY:</label>
+                            <textarea id="query" name="query" placeholder="Extract all the book titles, their authors, and the biography of the author" required></textarea>
+                        </div>
+                        <div class="form-options">
+                            <div class="option-group">
+                                <label for="scraping-approach">Approach:</label>
+                                <select id="scraping-approach" name="scraping_approach">
+                                    <option value="llm">LLM-based (More Flexible)</option>
+                                    <option value="schema">Schema-based (Uses LLM once!)</option>
+                                </select>
+                            </div>
+                            <div class="option-group">
+                                <label for="model-select">Model:</label>
+                                <select id="model-select" name="model_name" required>
+                                    <option value="">Select a Model</option>
+                                </select>
+                            </div>
+                        </div>
+                        <button type="submit" id="extract-btn" class="extract-btn">
+                            <i class="fas fa-magic"></i>
+                            Extract Data
+                        </button>
+                    </div>
+                </div>
+
+                <div class="workflow-arrow">→</div>
+
+                <div class="workflow-step">
+                    <h3 class="step-title">2. Your Instant API & Data</h3>
+                    <div class="response-container">
+                        <div class="api-request-box">
+                            <label>API Request (cURL):</label>
+                            <pre id="curl-example">curl -X POST http://localhost:8000/scrape -H "Content-Type: application/json" -d '{"url": "...", "query": "..."}'
+
+# Or for LLM-based approach:
+curl -X POST http://localhost:8000/scrape-with-llm -H "Content-Type: application/json" -d '{"url": "...", "query": "..."}'</pre>
+                        </div>
+                        <div class="json-response-box">
+                            <label>JSON Response:</label>
+                            <pre id="json-output">{
+  "success": true,
+  "extracted_data": [
+    {
+      "title": "Example Book",
+      "author": "John Doe",
+      "description": "A great book..."
+    }
+  ]
+}</pre>
+                        </div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Results Section -->
+            <div id="results-section" class="results-section" style="display: none;">
+                <div class="results-header">
+                    <h2>Extracted Data</h2>
+                    <button id="copy-json" class="copy-btn">
+                        <i class="fas fa-copy"></i>
+                        Copy JSON
+                    </button>
+                </div>
+                <div class="results-content">
+                    <div class="result-info">
+                        <div class="info-item">
+                            <span class="label">URL:</span>
+                            <span id="result-url" class="value"></span>
+                        </div>
+                        <div class="info-item">
+                            <span class="label">Query:</span>
+                            <span id="result-query" class="value"></span>
+                        </div>
+                        <div class="info-item">
+                            <span class="label">Model Used:</span>
+                            <span id="result-model" class="value"></span>
+                        </div>
+                    </div>
+                    <div class="json-display">
+                        <pre id="actual-json-output"></pre>
+                    </div>
+                </div>
+            </div>
+
+            <!-- Loading State -->
+            <div id="loading" class="loading" style="display: none;">
+                <div class="spinner"></div>
+                <p>AI is analyzing the website and extracting data...</p>
+            </div>
+        </div>
+
+        <!-- Models Page -->
+        <div id="models-page" class="page">
+            <div class="models-header">
+                <h1>Model Configuration</h1>
+                <p>Configure and manage your AI model configurations</p>
+            </div>
+
+            <div class="models-container">
+                <!-- Add New Model Form -->
+                <div class="model-form-section">
+                    <h3>Add New Model</h3>
+                    <form id="model-form" class="model-form">
+                        <div class="form-row">
+                            <div class="input-group">
+                                <label for="model-name">Model Name:</label>
+                                <input type="text" id="model-name" name="model_name" placeholder="my-gemini" required>
+                            </div>
+                            <div class="input-group">
+                                <label for="provider">Provider:</label>
+                                <input type="text" id="provider" name="provider" placeholder="gemini/gemini-2.5-flash" required>
+                            </div>
+                        </div>
+                        
+                        <div class="input-group">
+                            <label for="api-token">API Token:</label>
+                            <input type="password" id="api-token" name="api_token" placeholder="Enter your API token" required>
+                        </div>
+
+                        <button type="submit" class="save-btn">
+                            <i class="fas fa-save"></i>
+                            Save Model
+                        </button>
+                    </form>
+                </div>
+
+                <!-- Saved Models List -->
+                <div class="saved-models-section">
+                    <h3>Saved Models</h3>
+                    <div id="models-list" class="models-list">
+                        <!-- Models will be loaded here -->
+                    </div>
+                </div>
+            </div>
+        </div>
+
+        <!-- API Requests Page -->
+        <div id="requests-page" class="page">
+            <div class="requests-header">
+                <h1>Saved API Requests</h1>
+                <p>View and manage your previous API requests</p>
+            </div>
+
+            <div class="requests-container">
+                <div class="requests-list" id="requests-list">
+                    <!-- Saved requests will be loaded here -->
+                </div>
+            </div>
+        </div>
+    </main>
+
+    <!-- Toast Notifications -->
+    <div id="toast-container" class="toast-container"></div>
+
+    <script src="/static/script.js"></script>
+</body>
+</html> 
--- a/docs/examples/website-to-api/static/script.js
+++ b/docs/examples/website-to-api/static/script.js
@@ -0,0 +1,401 @@
+// API Configuration
+const API_BASE_URL = 'http://localhost:8000';
+
+// DOM Elements
+const navLinks = document.querySelectorAll('.nav-link');
+const pages = document.querySelectorAll('.page');
+const scrapeForm = document.getElementById('scrape-form');
+const modelForm = document.getElementById('model-form');
+const modelSelect = document.getElementById('model-select');
+const modelsList = document.getElementById('models-list');
+const resultsSection = document.getElementById('results-section');
+const loadingSection = document.getElementById('loading');
+const copyJsonBtn = document.getElementById('copy-json');
+
+// Navigation
+navLinks.forEach(link => {
+    link.addEventListener('click', (e) => {
+        e.preventDefault();
+        const targetPage = link.dataset.page;
+        
+        // Update active nav link
+        navLinks.forEach(l => l.classList.remove('active'));
+        link.classList.add('active');
+        
+        // Show target page
+        pages.forEach(page => page.classList.remove('active'));
+        document.getElementById(`${targetPage}-page`).classList.add('active');
+        
+        // Load data for the page
+        if (targetPage === 'models') {
+            loadModels();
+        } else if (targetPage === 'requests') {
+            loadSavedRequests();
+        }
+    });
+});
+
+// Scrape Form Handler
+document.getElementById('extract-btn').addEventListener('click', async (e) => {
+    e.preventDefault();
+    
+    // Scroll to results section immediately when button is clicked
+    document.getElementById('results-section').scrollIntoView({ 
+        behavior: 'smooth', 
+        block: 'start' 
+    });
+    
+    const url = document.getElementById('url').value;
+    const query = document.getElementById('query').value;
+    const headless = true; // Always use headless mode
+    const model_name = document.getElementById('model-select').value || null;
+    const scraping_approach = document.getElementById('scraping-approach').value;
+    
+    if (!url || !query) {
+        showToast('Please fill in both URL and query fields', 'error');
+        return;
+    }
+    
+    if (!model_name) {
+        showToast('Please select a model from the dropdown or add one from the Models page', 'error');
+        return;
+    }
+    
+    const data = {
+        url: url,
+        query: query,
+        headless: headless,
+        model_name: model_name
+    };
+    
+    // Show loading state
+    showLoading(true);
+    hideResults();
+    
+    try {
+        // Choose endpoint based on scraping approach
+        const endpoint = scraping_approach === 'llm' ? '/scrape-with-llm' : '/scrape';
+        
+        const response = await fetch(`${API_BASE_URL}${endpoint}`, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(data)
+        });
+        
+        const result = await response.json();
+        
+        if (response.ok) {
+            displayResults(result);
+            showToast(`Data extracted successfully using ${scraping_approach === 'llm' ? 'LLM-based' : 'Schema-based'} approach!`, 'success');
+        } else {
+            throw new Error(result.detail || 'Failed to extract data');
+        }
+    } catch (error) {
+        console.error('Scraping error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    } finally {
+        showLoading(false);
+    }
+});
+
+// Model Form Handler
+modelForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    
+    const formData = new FormData(modelForm);
+    const data = {
+        model_name: formData.get('model_name'),
+        provider: formData.get('provider'),
+        api_token: formData.get('api_token')
+    };
+    
+    try {
+        const response = await fetch(`${API_BASE_URL}/models`, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(data)
+        });
+        
+        const result = await response.json();
+        
+        if (response.ok) {
+            showToast('Model saved successfully!', 'success');
+            modelForm.reset();
+            loadModels();
+            loadModelSelect();
+        } else {
+            throw new Error(result.detail || 'Failed to save model');
+        }
+    } catch (error) {
+        console.error('Model save error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    }
+});
+
+// Copy JSON Button
+copyJsonBtn.addEventListener('click', () => {
+    const actualJsonOutput = document.getElementById('actual-json-output');
+    const textToCopy = actualJsonOutput.textContent;
+    
+    navigator.clipboard.writeText(textToCopy).then(() => {
+        showToast('JSON copied to clipboard!', 'success');
+    }).catch(() => {
+        showToast('Failed to copy JSON', 'error');
+    });
+});
+
+// Load Models
+async function loadModels() {
+    try {
+        const response = await fetch(`${API_BASE_URL}/models`);
+        const result = await response.json();
+        
+        if (response.ok) {
+            displayModels(result.models);
+        } else {
+            throw new Error(result.detail || 'Failed to load models');
+        }
+    } catch (error) {
+        console.error('Load models error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    }
+}
+
+// Display Models
+function displayModels(models) {
+    if (models.length === 0) {
+        modelsList.innerHTML = '<p style="text-align: center; color: #7f8c8d; padding: 2rem;">No models saved yet. Add your first model above!</p>';
+        return;
+    }
+    
+    modelsList.innerHTML = models.map(model => `
+        <div class="model-card">
+            <div class="model-info">
+                <div class="model-name">${model}</div>
+                <div class="model-provider">Model Configuration</div>
+            </div>
+            <div class="model-actions">
+                <button class="btn btn-danger" onclick="deleteModel('${model}')">
+                    <i class="fas fa-trash"></i>
+                    Delete
+                </button>
+            </div>
+        </div>
+    `).join('');
+}
+
+// Delete Model
+async function deleteModel(modelName) {
+    if (!confirm(`Are you sure you want to delete the model "${modelName}"?`)) {
+        return;
+    }
+    
+    try {
+        const response = await fetch(`${API_BASE_URL}/models/${modelName}`, {
+            method: 'DELETE'
+        });
+        
+        const result = await response.json();
+        
+        if (response.ok) {
+            showToast('Model deleted successfully!', 'success');
+            loadModels();
+            loadModelSelect();
+        } else {
+            throw new Error(result.detail || 'Failed to delete model');
+        }
+    } catch (error) {
+        console.error('Delete model error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    }
+}
+
+// Load Model Select Options
+async function loadModelSelect() {
+    try {
+        const response = await fetch(`${API_BASE_URL}/models`);
+        const result = await response.json();
+        
+        if (response.ok) {
+            // Clear existing options
+            modelSelect.innerHTML = '<option value="">Select a Model</option>';
+            
+            // Add model options
+            result.models.forEach(model => {
+                const option = document.createElement('option');
+                option.value = model;
+                option.textContent = model;
+                modelSelect.appendChild(option);
+            });
+        }
+    } catch (error) {
+        console.error('Load model select error:', error);
+    }
+}
+
+// Display Results
+function displayResults(result) {
+    // Update result info
+    document.getElementById('result-url').textContent = result.url;
+    document.getElementById('result-query').textContent = result.query;
+    document.getElementById('result-model').textContent = result.model_name || 'Default Model';
+    
+    // Display JSON in the actual results section
+    const actualJsonOutput = document.getElementById('actual-json-output');
+    actualJsonOutput.textContent = JSON.stringify(result.extracted_data, null, 2);
+    
+    // Don't update the sample JSON in the workflow demo - keep it as example
+    
+    // Update the cURL example based on the approach used
+    const scraping_approach = document.getElementById('scraping-approach').value;
+    const endpoint = scraping_approach === 'llm' ? '/scrape-with-llm' : '/scrape';
+    const curlExample = document.getElementById('curl-example');
+    curlExample.textContent = `curl -X POST http://localhost:8000${endpoint} -H "Content-Type: application/json" -d '{"url": "${result.url}", "query": "${result.query}"}'`;
+    
+    // Show results section
+    resultsSection.style.display = 'block';
+    resultsSection.scrollIntoView({ behavior: 'smooth' });
+}
+
+// Show/Hide Loading
+function showLoading(show) {
+    loadingSection.style.display = show ? 'block' : 'none';
+}
+
+// Hide Results
+function hideResults() {
+    resultsSection.style.display = 'none';
+}
+
+// Toast Notifications
+function showToast(message, type = 'info') {
+    const toastContainer = document.getElementById('toast-container');
+    const toast = document.createElement('div');
+    toast.className = `toast ${type}`;
+    
+    const icon = type === 'success' ? 'fas fa-check-circle' : 
+                 type === 'error' ? 'fas fa-exclamation-circle' : 
+                 'fas fa-info-circle';
+    
+    toast.innerHTML = `
+        <i class="${icon}"></i>
+        <span>${message}</span>
+    `;
+    
+    toastContainer.appendChild(toast);
+    
+    // Auto remove after 5 seconds
+    setTimeout(() => {
+        toast.remove();
+    }, 5000);
+}
+
+// Load Saved Requests
+async function loadSavedRequests() {
+    try {
+        const response = await fetch(`${API_BASE_URL}/saved-requests`);
+        const result = await response.json();
+        
+        if (response.ok) {
+            displaySavedRequests(result.requests);
+        } else {
+            throw new Error(result.detail || 'Failed to load saved requests');
+        }
+    } catch (error) {
+        console.error('Load saved requests error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    }
+}
+
+// Display Saved Requests
+function displaySavedRequests(requests) {
+    const requestsList = document.getElementById('requests-list');
+    
+    if (requests.length === 0) {
+        requestsList.innerHTML = '<p style="text-align: center; color: #CCCCCC; padding: 2rem;">No saved API requests yet. Make your first request from the Scrape page!</p>';
+        return;
+    }
+    
+    requestsList.innerHTML = requests.map(request => {
+        const url = request.body.url;
+        const query = request.body.query;
+        const model = request.body.model_name || 'Default Model';
+        const endpoint = request.endpoint;
+        
+        // Create curl command
+        const curlCommand = `curl -X POST http://localhost:8000${endpoint} \\
+  -H "Content-Type: application/json" \\
+  -d '{
+    "url": "${url}",
+    "query": "${query}",
+    "model_name": "${model}"
+  }'`;
+        
+        return `
+            <div class="request-card">
+                <div class="request-header">
+                    <div class="request-info">
+                        <div class="request-url">${url}</div>
+                        <div class="request-query">${query}</div>
+                    </div>
+                    <div class="request-actions">
+                        <button class="btn-danger" onclick="deleteSavedRequest('${request.id}')">
+                            <i class="fas fa-trash"></i>
+                            Delete
+                        </button>
+                    </div>
+                </div>
+                
+                <div class="request-curl">
+                    <h4>cURL Command:</h4>
+                    <pre>${curlCommand}</pre>
+                </div>
+            </div>
+        `;
+    }).join('');
+}
+
+// Delete Saved Request
+async function deleteSavedRequest(requestId) {
+    if (!confirm('Are you sure you want to delete this saved request?')) {
+        return;
+    }
+    
+    try {
+        const response = await fetch(`${API_BASE_URL}/saved-requests/${requestId}`, {
+            method: 'DELETE'
+        });
+        
+        const result = await response.json();
+        
+        if (response.ok) {
+            showToast('Saved request deleted successfully!', 'success');
+            loadSavedRequests();
+        } else {
+            throw new Error(result.detail || 'Failed to delete saved request');
+        }
+    } catch (error) {
+        console.error('Delete saved request error:', error);
+        showToast(`Error: ${error.message}`, 'error');
+    }
+}
+
+// Initialize
+document.addEventListener('DOMContentLoaded', () => {
+    loadModelSelect();
+    
+    // Check if API is available
+    fetch(`${API_BASE_URL}/health`)
+        .then(response => {
+            if (!response.ok) {
+                showToast('Warning: API server might not be running', 'error');
+            }
+        })
+        .catch(() => {
+            showToast('Warning: Cannot connect to API server. Make sure it\'s running on localhost:8000', 'error');
+        });
+}); 
--- a/docs/examples/website-to-api/static/styles.css
+++ b/docs/examples/website-to-api/static/styles.css
@@ -0,0 +1,765 @@
+/* Reset and Base Styles */
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+
+body {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    background: #000000;
+    color: #FFFFFF;
+    line-height: 1.6;
+    font-size: 16px;
+}
+
+/* Header */
+.header {
+    border-bottom: 1px solid #333;
+    padding: 1rem 0;
+    background: #000000;
+    position: sticky;
+    top: 0;
+    z-index: 100;
+}
+
+.header-content {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 0 2rem;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+}
+
+.logo {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    font-size: 1.5rem;
+    font-weight: 600;
+    color: #FFFFFF;
+}
+
+.logo-image {
+    width: 40px;
+    height: 40px;
+    border-radius: 4px;
+    object-fit: contain;
+}
+
+.nav-links {
+    display: flex;
+    gap: 2rem;
+}
+
+.nav-link {
+    color: #CCCCCC;
+    text-decoration: none;
+    font-weight: 500;
+    transition: color 0.2s ease;
+}
+
+.nav-link:hover,
+.nav-link.active {
+    color: #FFFFFF;
+}
+
+/* Main Content */
+.main-content {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 2rem;
+}
+
+.page {
+    display: none;
+}
+
+.page.active {
+    display: block;
+}
+
+/* Hero Section */
+.hero-section {
+    text-align: center;
+    margin-bottom: 4rem;
+    padding: 2rem 0;
+}
+
+.hero-title {
+    font-size: 3rem;
+    font-weight: 700;
+    color: #FFFFFF;
+    margin-bottom: 1rem;
+    line-height: 1.2;
+}
+
+.hero-subtitle {
+    font-size: 1.25rem;
+    color: #CCCCCC;
+    max-width: 600px;
+    margin: 0 auto;
+}
+
+/* Workflow Demo */
+.workflow-demo {
+    display: grid;
+    grid-template-columns: 1fr auto 1fr;
+    gap: 2rem;
+    align-items: start;
+    margin-bottom: 4rem;
+}
+
+.workflow-step {
+    display: flex;
+    flex-direction: column;
+    gap: 1rem;
+}
+
+.step-title {
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #FFFFFF;
+    text-align: center;
+    margin-bottom: 1rem;
+}
+
+.workflow-arrow {
+    font-size: 2rem;
+    font-weight: 700;
+    color: #09b5a5;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    margin-top: 20rem;
+}
+
+/* Request Box */
+.request-box {
+    border: 2px solid #333;
+    border-radius: 8px;
+    padding: 2rem;
+    background: #111111;
+}
+
+.input-group {
+    margin-bottom: 1.5rem;
+}
+
+.input-group label {
+    display: block;
+    font-family: 'Courier New', monospace;
+    font-weight: 600;
+    color: #FFFFFF;
+    margin-bottom: 0.5rem;
+    font-size: 0.9rem;
+}
+
+.input-group input,
+.input-group textarea,
+.input-group select {
+    width: 100%;
+    padding: 0.75rem;
+    border: 1px solid #333;
+    border-radius: 4px;
+    font-family: 'Courier New', monospace;
+    font-size: 0.9rem;
+    background: #1A1A1A;
+    color: #FFFFFF;
+    transition: border-color 0.2s ease;
+}
+
+.input-group input:focus,
+.input-group textarea:focus,
+.input-group select:focus {
+    outline: none;
+    border-color: #09b5a5;
+}
+
+.input-group textarea {
+    min-height: 80px;
+    resize: vertical;
+}
+
+.form-options {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 1rem;
+    margin-bottom: 1.5rem;
+}
+
+.option-group {
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+}
+
+.option-group label {
+    font-family: 'Courier New', monospace;
+    font-weight: 600;
+    color: #FFFFFF;
+    font-size: 0.9rem;
+}
+
+.option-group input[type="checkbox"] {
+    width: auto;
+    margin-right: 0.5rem;
+}
+
+.extract-btn {
+    width: 100%;
+    padding: 1rem;
+    background: #09b5a5;
+    color: #000000;
+    border: none;
+    border-radius: 4px;
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background-color 0.2s ease;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+}
+
+.extract-btn:hover {
+    background: #09b5a5;
+}
+
+/* Dropdown specific styling */
+select,
+.input-group select,
+.option-group select {
+    cursor: pointer !important;
+    appearance: none !important;
+    -webkit-appearance: none !important;
+    -moz-appearance: none !important;
+    -ms-appearance: none !important;
+    background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%23FFFFFF' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6,9 12,15 18,9'%3e%3c/polyline%3e%3c/svg%3e") !important;
+    background-repeat: no-repeat !important;
+    background-position: right 0.75rem center !important;
+    background-size: 1rem !important;
+    padding-right: 2.5rem !important;
+    border: 1px solid #333 !important;
+    border-radius: 4px !important;
+    font-family: 'Courier New', monospace !important;
+    font-size: 0.9rem !important;
+    background-color: #1A1A1A !important;
+    color: #FFFFFF !important;
+}
+
+select:hover,
+.input-group select:hover,
+.option-group select:hover {
+    border-color: #09b5a5 !important;
+}
+
+select:focus,
+.input-group select:focus,
+.option-group select:focus {
+    outline: none !important;
+    border-color: #09b5a5 !important;
+}
+
+select option,
+.input-group select option,
+.option-group select option {
+    background: #1A1A1A !important;
+    color: #FFFFFF !important;
+    padding: 0.5rem !important;
+}
+
+/* Response Container */
+.response-container {
+    display: flex;
+    flex-direction: column;
+    gap: 1rem;
+}
+
+.api-request-box,
+.json-response-box {
+    border: 2px solid #333;
+    border-radius: 8px;
+    padding: 1.5rem;
+    background: #111111;
+}
+
+.api-request-box label,
+.json-response-box label {
+    display: block;
+    font-family: 'Courier New', monospace;
+    font-weight: 600;
+    color: #FFFFFF;
+    margin-bottom: 0.5rem;
+    font-size: 0.9rem;
+}
+
+.api-request-box pre,
+.json-response-box pre {
+    font-family: 'Courier New', monospace;
+    font-size: 0.85rem;
+    line-height: 1.5;
+    color: #FFFFFF;
+    background: #1A1A1A;
+    padding: 1rem;
+    border-radius: 4px;
+    overflow-x: auto;
+    white-space: pre-wrap;
+    word-break: break-all;
+}
+
+/* Results Section */
+.results-section {
+    border: 2px solid #333;
+    border-radius: 8px;
+    overflow: hidden;
+    margin-top: 2rem;
+    background: #111111;
+}
+
+.results-header {
+    background: #1A1A1A;
+    color: #FFFFFF;
+    padding: 1rem 1.5rem;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    border-bottom: 1px solid #333;
+}
+
+.results-header h2 {
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #FFFFFF;
+}
+
+.copy-btn {
+    background: #09b5a5;
+    color: #000000;
+    border: none;
+    padding: 0.5rem 1rem;
+    border-radius: 4px;
+    font-size: 0.9rem;
+    font-weight: 600;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    transition: background-color 0.2s ease;
+}
+
+.copy-btn:hover {
+    background: #09b5a5;
+}
+
+.results-content {
+    padding: 1.5rem;
+}
+
+.result-info {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+    gap: 1rem;
+    margin-bottom: 1.5rem;
+    padding: 1rem;
+    background: #1A1A1A;
+    border-radius: 4px;
+    border: 1px solid #333;
+}
+
+.info-item {
+    display: flex;
+    flex-direction: column;
+    gap: 0.25rem;
+}
+
+.info-item .label {
+    font-weight: 600;
+    color: #FFFFFF;
+    font-size: 0.9rem;
+}
+
+.info-item .value {
+    color: #CCCCCC;
+    word-break: break-all;
+}
+
+.json-display {
+    background: #1A1A1A;
+    border-radius: 4px;
+    overflow: hidden;
+    border: 1px solid #333;
+}
+
+.json-display pre {
+    color: #FFFFFF;
+    padding: 1.5rem;
+    margin: 0;
+    overflow-x: auto;
+    font-family: 'Courier New', monospace;
+    font-size: 0.9rem;
+    line-height: 1.5;
+}
+
+/* Loading State */
+.loading {
+    text-align: center;
+    padding: 3rem;
+}
+
+.spinner {
+    width: 40px;
+    height: 40px;
+    border: 3px solid #333;
+    border-top: 3px solid #09b5a5;
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+    margin: 0 auto 1rem;
+}
+
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+
+/* Models Page */
+.models-header {
+    text-align: center;
+    margin-bottom: 3rem;
+}
+
+.models-header h1 {
+    font-size: 2.5rem;
+    font-weight: 700;
+    color: #FFFFFF;
+    margin-bottom: 1rem;
+}
+
+.models-header p {
+    font-size: 1.1rem;
+    color: #CCCCCC;
+}
+
+/* API Requests Page */
+.requests-header {
+    text-align: center;
+    margin-bottom: 3rem;
+}
+
+.requests-header h1 {
+    font-size: 2.5rem;
+    font-weight: 700;
+    color: #FFFFFF;
+    margin-bottom: 1rem;
+}
+
+.requests-header p {
+    font-size: 1.1rem;
+    color: #CCCCCC;
+}
+
+.requests-container {
+    max-width: 1200px;
+    margin: 0 auto;
+}
+
+.requests-list {
+    display: grid;
+    gap: 1.5rem;
+}
+
+.request-card {
+    border: 2px solid #333;
+    border-radius: 8px;
+    padding: 1.5rem;
+    background: #111111;
+    transition: border-color 0.2s ease;
+}
+
+.request-card:hover {
+    border-color: #09b5a5;
+}
+
+.request-header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 1rem;
+    padding-bottom: 1rem;
+    border-bottom: 1px solid #333;
+}
+
+.request-info {
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+}
+
+.request-url {
+    font-family: 'Courier New', monospace;
+    font-weight: 600;
+    color: #09b5a5;
+    font-size: 1.1rem;
+    word-break: break-all;
+}
+
+.request-query {
+    color: #CCCCCC;
+    font-size: 0.9rem;
+    margin-top: 0.5rem;
+    word-break: break-all;
+}
+
+.request-actions {
+    display: flex;
+    gap: 0.5rem;
+}
+
+.request-curl {
+    background: #1A1A1A;
+    border: 1px solid #333;
+    border-radius: 4px;
+    padding: 1rem;
+    margin-top: 1rem;
+}
+
+.request-curl h4 {
+    color: #FFFFFF;
+    font-size: 0.9rem;
+    font-weight: 600;
+    margin-bottom: 0.5rem;
+    font-family: 'Courier New', monospace;
+}
+
+.request-curl pre {
+    color: #CCCCCC;
+    font-size: 0.8rem;
+    line-height: 1.4;
+    overflow-x: auto;
+    white-space: pre-wrap;
+    word-break: break-all;
+    background: #111111;
+    padding: 0.75rem;
+    border-radius: 4px;
+    border: 1px solid #333;
+}
+
+.models-container {
+    max-width: 800px;
+    margin: 0 auto;
+}
+
+.model-form-section {
+    border: 2px solid #333;
+    border-radius: 8px;
+    padding: 2rem;
+    margin-bottom: 2rem;
+    background: #111111;
+}
+
+.model-form-section h3 {
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #FFFFFF;
+    margin-bottom: 1.5rem;
+}
+
+.model-form {
+    display: flex;
+    flex-direction: column;
+    gap: 1.5rem;
+}
+
+.form-row {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 1rem;
+}
+
+.save-btn {
+    padding: 1rem;
+    background: #09b5a5;
+    color: #000000;
+    border: none;
+    border-radius: 4px;
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background-color 0.2s ease;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 0.5rem;
+}
+
+.save-btn:hover {
+    background: #09b5a5;
+}
+
+.saved-models-section h3 {
+    font-size: 1.25rem;
+    font-weight: 600;
+    color: #FFFFFF;
+    margin-bottom: 1.5rem;
+}
+
+.models-list {
+    display: grid;
+    gap: 1rem;
+}
+
+.model-card {
+    border: 2px solid #333;
+    border-radius: 8px;
+    padding: 1.5rem;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    transition: border-color 0.2s ease;
+    background: #111111;
+}
+
+.model-card:hover {
+    border-color: #09b5a5;
+}
+
+.model-info {
+    flex: 1;
+}
+
+.model-name {
+    font-weight: 600;
+    color: #FFFFFF;
+    font-size: 1.1rem;
+    margin-bottom: 0.5rem;
+}
+
+.model-provider {
+    color: #CCCCCC;
+    font-size: 0.9rem;
+}
+
+.model-actions {
+    display: flex;
+    gap: 0.5rem;
+}
+
+.btn-danger {
+    background: #FF4444;
+    color: #FFFFFF;
+    border: none;
+    padding: 0.5rem 1rem;
+    border-radius: 4px;
+    font-size: 0.9rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background-color 0.2s ease;
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+
+.btn-danger:hover {
+    background: #CC3333;
+}
+
+
+
+/* Toast Notifications */
+.toast-container {
+    position: fixed;
+    top: 20px;
+    right: 20px;
+    z-index: 1000;
+}
+
+.toast {
+    background: #111111;
+    border: 2px solid #333;
+    border-radius: 4px;
+    padding: 1rem 1.5rem;
+    margin-bottom: 0.5rem;
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    animation: slideIn 0.3s ease;
+    max-width: 400px;
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
+    color: #FFFFFF;
+}
+
+.toast.success {
+    border-color: #09b5a5;
+    background: #0A1A1A;
+}
+
+.toast.error {
+    border-color: #FF4444;
+    background: #1A0A0A;
+}
+
+.toast.info {
+    border-color: #09b5a5;
+    background: #0A1A1A;
+}
+
+@keyframes slideIn {
+    from {
+        transform: translateX(100%);
+        opacity: 0;
+    }
+    to {
+        transform: translateX(0);
+        opacity: 1;
+    }
+}
+
+/* Responsive Design */
+@media (max-width: 768px) {
+    .header-content {
+        padding: 0 1rem;
+    }
+    
+    .main-content {
+        padding: 1rem;
+    }
+    
+    .hero-title {
+        font-size: 2rem;
+    }
+    
+    .workflow-demo {
+        grid-template-columns: 1fr;
+        gap: 1rem;
+    }
+    
+    .workflow-arrow {
+        transform: rotate(90deg);
+        margin: 1rem 0;
+    }
+    
+    .form-options {
+        grid-template-columns: 1fr;
+    }
+    
+    .form-row {
+        grid-template-columns: 1fr;
+    }
+    
+    .result-info {
+        grid-template-columns: 1fr;
+    }
+    
+    .model-card {
+        flex-direction: column;
+        gap: 1rem;
+        text-align: center;
+    }
+    
+    .model-actions {
+        width: 100%;
+        justify-content: center;
+    }
+}
--- a/docs/examples/website-to-api/test_api.py
+++ b/docs/examples/website-to-api/test_api.py
@@ -0,0 +1,28 @@
+import asyncio
+from web_scraper_lib import scrape_website
+import os
+
+async def test_library():
+    """Test the mini library directly."""
+    print("=== Testing Mini Library ===")
+    
+    # Test 1: Scrape with a custom model
+    url = "https://marketplace.mainstreet.co.in/collections/adidas-yeezy/products/adidas-yeezy-boost-350-v2-yecheil-non-reflective"
+    query = "Extract the following data: Product name, Product price, Product description, Product size. DO NOT EXTRACT ANYTHING ELSE."
+    if os.path.exists("models"):
+        model_name = os.listdir("models")[0].split(".")[0]
+    else:
+        raise Exception("No models found in models directory")
+
+    print(f"Scraping: {url}")
+    print(f"Query: {query}")
+    
+    try:
+        result = await scrape_website(url, query, model_name)
+        print("✅ Library test successful!")
+        print(f"Extracted data: {result['extracted_data']}")
+    except Exception as e:
+        print(f"❌ Library test failed: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(test_library())
--- a/docs/examples/website-to-api/test_models.py
+++ b/docs/examples/website-to-api/test_models.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Test script for the new model management functionality.
+This script demonstrates how to save and use custom model configurations.
+"""
+
+import asyncio
+import requests
+import json
+
+# API base URL
+BASE_URL = "http://localhost:8000"
+
+def test_model_management():
+    """Test the model management endpoints."""
+    
+    print("=== Testing Model Management ===")
+    
+    # 1. List current models
+    print("\n1. Listing current models:")
+    response = requests.get(f"{BASE_URL}/models")
+    print(f"Status: {response.status_code}")
+    print(f"Response: {json.dumps(response.json(), indent=2)}")
+    
+    
+    # 2. Save another model configuration (OpenAI example)
+    print("\n2. Saving OpenAI model configuration:")
+    openai_config = {
+        "model_name": "my-openai",
+        "provider": "openai",
+        "api_token": "your-openai-api-key-here"
+    }
+    
+    response = requests.post(f"{BASE_URL}/models", json=openai_config)
+    print(f"Status: {response.status_code}")
+    print(f"Response: {json.dumps(response.json(), indent=2)}")
+    
+    # 3. List models again to see the new ones
+    print("\n3. Listing models after adding new ones:")
+    response = requests.get(f"{BASE_URL}/models")
+    print(f"Status: {response.status_code}")
+    print(f"Response: {json.dumps(response.json(), indent=2)}")
+
+    # 4. Delete a model configuration
+    print("\n4. Deleting a model configuration:")
+    response = requests.delete(f"{BASE_URL}/models/my-openai")
+    print(f"Status: {response.status_code}")
+    print(f"Response: {json.dumps(response.json(), indent=2)}")
+    
+    # 5. Final list of models
+    print("\n5. Final list of models:")
+    response = requests.get(f"{BASE_URL}/models")
+    print(f"Status: {response.status_code}")
+    print(f"Response: {json.dumps(response.json(), indent=2)}")
+
+if __name__ == "__main__":
+    print("Model Management Test Script")
+    print("Make sure the API server is running on http://localhost:8000")
+    print("=" * 50)
+    
+    try:
+        test_model_management()
+    except requests.exceptions.ConnectionError:
+        print("Error: Could not connect to the API server.")
+        print("Make sure the server is running with: python api_server.py")
+    except Exception as e:
+        print(f"Error: {e}") 
--- a/docs/examples/website-to-api/web_scraper_lib.py
+++ b/docs/examples/website-to-api/web_scraper_lib.py
@@ -0,0 +1,397 @@
+from crawl4ai import (
+    AsyncWebCrawler,
+    BrowserConfig,
+    CacheMode,
+    CrawlerRunConfig,
+    LLMConfig,
+    JsonCssExtractionStrategy,
+    LLMExtractionStrategy
+)
+import os
+import json
+import hashlib
+from typing import Dict, Any, Optional, List
+from litellm import completion
+
+class ModelConfig:
+    """Configuration for LLM models."""
+    
+    def __init__(self, provider: str, api_token: str):
+        self.provider = provider
+        self.api_token = api_token
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "provider": self.provider,
+            "api_token": self.api_token
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ModelConfig':
+        return cls(
+            provider=data["provider"],
+            api_token=data["api_token"]
+        )
+
+class WebScraperAgent:
+    """
+    A mini library that converts any website into a structured data API.
+    
+    Features:
+    1. Provide a URL and tell AI what data you need in plain English
+    2. Generate: Agent reverse-engineers the site and deploys custom scraper
+    3. Integrate: Use private API endpoint to get structured data
+    4. Support for custom LLM models and API keys
+    """
+    
+    def __init__(self, schemas_dir: str = "schemas", models_dir: str = "models"):
+        self.schemas_dir = schemas_dir
+        self.models_dir = models_dir
+        os.makedirs(self.schemas_dir, exist_ok=True)
+        os.makedirs(self.models_dir, exist_ok=True)
+    
+    def _generate_schema_key(self, url: str, query: str) -> str:
+        """Generate a unique key for schema caching based on URL and query."""
+        content = f"{url}:{query}"
+        return hashlib.md5(content.encode()).hexdigest()
+    
+    def save_model_config(self, model_name: str, provider: str, api_token: str) -> bool:
+        """
+        Save a model configuration for later use.
+        
+        Args:
+            model_name: User-friendly name for the model
+            provider: LLM provider (e.g., 'gemini', 'openai', 'anthropic')
+            api_token: API token for the provider
+            
+        Returns:
+            True if saved successfully
+        """
+        try:
+            model_config = ModelConfig(provider, api_token)
+            config_path = os.path.join(self.models_dir, f"{model_name}.json")
+            
+            with open(config_path, "w") as f:
+                json.dump(model_config.to_dict(), f, indent=2)
+            
+            print(f"Model configuration saved: {model_name}")
+            return True
+        except Exception as e:
+            print(f"Failed to save model configuration: {e}")
+            return False
+    
+    def load_model_config(self, model_name: str) -> Optional[ModelConfig]:
+        """
+        Load a saved model configuration.
+        
+        Args:
+            model_name: Name of the saved model configuration
+            
+        Returns:
+            ModelConfig object or None if not found
+        """
+        try:
+            config_path = os.path.join(self.models_dir, f"{model_name}.json")
+            if not os.path.exists(config_path):
+                return None
+            
+            with open(config_path, "r") as f:
+                data = json.load(f)
+            
+            return ModelConfig.from_dict(data)
+        except Exception as e:
+            print(f"Failed to load model configuration: {e}")
+            return None
+    
+    def list_saved_models(self) -> List[str]:
+        """List all saved model configurations."""
+        models = []
+        for filename in os.listdir(self.models_dir):
+            if filename.endswith('.json'):
+                models.append(filename[:-5])  # Remove .json extension
+        return models
+    
+    def delete_model_config(self, model_name: str) -> bool:
+        """
+        Delete a saved model configuration.
+        
+        Args:
+            model_name: Name of the model configuration to delete
+            
+        Returns:
+            True if deleted successfully
+        """
+        try:
+            config_path = os.path.join(self.models_dir, f"{model_name}.json")
+            if os.path.exists(config_path):
+                os.remove(config_path)
+                print(f"Model configuration deleted: {model_name}")
+                return True
+            return False
+        except Exception as e:
+            print(f"Failed to delete model configuration: {e}")
+            return False
+    
+    async def _load_or_generate_schema(self, url: str, query: str, session_id: str = "schema_generator", model_name: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Loads schema from cache if exists, otherwise generates using AI.
+        This is the "Generate" step - our agent reverse-engineers the site.
+        
+        Args:
+            url: URL to scrape
+            query: Query for data extraction
+            session_id: Session identifier
+            model_name: Name of saved model configuration to use
+        """
+        schema_key = self._generate_schema_key(url, query)
+        schema_path = os.path.join(self.schemas_dir, f"{schema_key}.json")
+        
+        if os.path.exists(schema_path):
+            print(f"Schema found in cache for {url}")
+            with open(schema_path, "r") as f:
+                return json.load(f)
+        
+        print(f"Generating new schema for {url}")
+        print(f"Query: {query}")
+        query += """
+        IMPORTANT:
+        GENERATE THE SCHEMA WITH ONLY THE FIELDS MENTIONED IN THE QUERY. MAKE SURE THE NUMBER OF FIELDS IN THE SCHEME MATCH THE NUMBER OF FIELDS IN THE QUERY.
+        """
+        
+        # Step 1: Fetch the page HTML
+        async with AsyncWebCrawler(config=BrowserConfig(headless=True)) as crawler:
+            result = await crawler.arun(
+                url=url,
+                config=CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    session_id=session_id,
+                    simulate_user=True,
+                    remove_overlay_elements=True,
+                    delay_before_return_html=5,
+                )
+            )
+            html = result.fit_html
+        
+        # Step 2: Generate schema using AI with custom model if specified
+        print("AI is analyzing the page structure...")
+        
+        # Use custom model configuration if provided
+        if model_name:
+            model_config = self.load_model_config(model_name)
+            if model_config:
+                llm_config = LLMConfig(
+                    provider=model_config.provider,
+                    api_token=model_config.api_token
+                )
+                print(f"Using custom model: {model_name}")
+            else:
+                raise ValueError(f"Model configuration '{model_name}' not found. Please add it from the Models page.")
+        else:
+            # Require a model to be specified
+            raise ValueError("No model specified. Please select a model from the dropdown or add one from the Models page.")
+        
+        schema = JsonCssExtractionStrategy.generate_schema(
+            html=html,
+            llm_config=llm_config,
+            query=query
+        )
+        
+        # Step 3: Cache the generated schema
+        print(f"Schema generated and cached: {json.dumps(schema, indent=2)}")
+        with open(schema_path, "w") as f:
+            json.dump(schema, f, indent=2)
+        
+        return schema
+    
+    def _generate_llm_schema(self, query: str, llm_config: LLMConfig) -> Dict[str, Any]:
+        """
+        Generate a schema for a given query using a custom LLM model.
+        
+        Args:
+            query: Plain English description of what data to extract
+            model_config: Model configuration to use
+        """
+        # ask the model to generate a schema for the given query in the form of a json.
+        prompt = f"""
+        IDENTIFY THE FIELDS FOR EXTRACTION MENTIONED IN THE QUERY and GENERATE A JSON SCHEMA FOR THE FIELDS.
+        eg.
+        {{
+            "name": "str",  
+            "age": "str",
+            "email": "str",
+            "product_name": "str",
+            "product_price": "str",
+            "product_description": "str",
+            "product_image": "str",
+            "product_url": "str",
+            "product_rating": "str",
+            "product_reviews": "str",
+        }}
+        Here is the query:
+        {query}
+        IMPORTANT:
+        THE RESULT SHOULD BE A JSON OBJECT.
+        MAKE SURE THE NUMBER OF FIELDS IN THE RESULT MATCH THE NUMBER OF FIELDS IN THE QUERY.
+        THE RESULT SHOULD BE A JSON OBJECT.
+        """
+        response = completion(
+            model=llm_config.provider,
+            messages=[{"role": "user", "content": prompt}],
+            api_key=llm_config.api_token,
+            result_type="json"
+        )
+
+        return response.json()["choices"][0]["message"]["content"]
+    async def scrape_data_with_llm(self, url: str, query: str, model_name: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Scrape structured data from any website using a custom LLM model.
+        
+        Args:
+            url: The website URL to scrape
+            query: Plain English description of what data to extract
+            model_name: Name of saved model configuration to use
+        """
+
+        if model_name:
+            model_config = self.load_model_config(model_name)
+            if model_config:
+                llm_config = LLMConfig(
+                    provider=model_config.provider,
+                    api_token=model_config.api_token
+                )
+                print(f"Using custom model: {model_name}")
+            else:
+                raise ValueError(f"Model configuration '{model_name}' not found. Please add it from the Models page.")
+        else:
+            # Require a model to be specified
+            raise ValueError("No model specified. Please select a model from the dropdown or add one from the Models page.")
+
+        query += """\n  
+        IMPORTANT:
+        THE RESULT SHOULD BE A JSON OBJECT WITH THE ONLY THE FIELDS MENTIONED IN THE QUERY.
+        MAKE SURE THE NUMBER OF FIELDS IN THE RESULT MATCH THE NUMBER OF FIELDS IN THE QUERY.
+        THE RESULT SHOULD BE A JSON OBJECT.
+        """
+
+        schema = self._generate_llm_schema(query, llm_config)
+
+        print(f"Schema: {schema}")
+
+        llm_extraction_strategy = LLMExtractionStrategy(
+            llm_config=llm_config,
+            instruction=query,
+            result_type="json",
+            schema=schema
+        )
+
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=url,
+                config=CrawlerRunConfig(
+                    cache_mode=CacheMode.BYPASS,
+                    simulate_user=True,
+                    extraction_strategy=llm_extraction_strategy,
+                )
+            )
+        extracted_data = result.extracted_content
+        if isinstance(extracted_data, str):
+                try:
+                    extracted_data = json.loads(extracted_data)
+                except json.JSONDecodeError:
+                    # If it's not valid JSON, keep it as string
+                    pass
+            
+        return {
+                "url": url,
+                "query": query,
+                "extracted_data": extracted_data,
+                "timestamp": result.timestamp if hasattr(result, 'timestamp') else None
+            }
+        
+    async def scrape_data(self, url: str, query: str, model_name: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Main method to scrape structured data from any website.
+        
+        Args:
+            url: The website URL to scrape
+            query: Plain English description of what data to extract
+            model_name: Name of saved model configuration to use
+            
+        Returns:
+            Structured data extracted from the website
+        """
+        # Step 1: Generate or load schema (reverse-engineer the site)
+        schema = await self._load_or_generate_schema(url=url, query=query, model_name=model_name)
+        
+        # Step 2: Deploy custom high-speed scraper
+        print(f"Deploying custom scraper for {url}")
+        browser_config = BrowserConfig(headless=True)
+        
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            run_config = CrawlerRunConfig(
+                extraction_strategy=JsonCssExtractionStrategy(schema=schema),
+            )
+            result = await crawler.arun(url=url, config=run_config)
+            
+            # Step 3: Return structured data
+            # Parse extracted_content if it's a JSON string
+            extracted_data = result.extracted_content
+            if isinstance(extracted_data, str):
+                try:
+                    extracted_data = json.loads(extracted_data)
+                except json.JSONDecodeError:
+                    # If it's not valid JSON, keep it as string
+                    pass
+            
+            return {
+                "url": url,
+                "query": query,
+                "extracted_data": extracted_data,
+                "schema_used": schema,
+                "timestamp": result.timestamp if hasattr(result, 'timestamp') else None
+            }
+    
+    async def get_cached_schemas(self) -> Dict[str, str]:
+        """Get list of cached schemas."""
+        schemas = {}
+        for filename in os.listdir(self.schemas_dir):
+            if filename.endswith('.json'):
+                schema_key = filename[:-5]  # Remove .json extension
+                schemas[schema_key] = filename
+        return schemas
+    
+    def clear_cache(self):
+        """Clear all cached schemas."""
+        import shutil
+        if os.path.exists(self.schemas_dir):
+            shutil.rmtree(self.schemas_dir)
+        os.makedirs(self.schemas_dir, exist_ok=True)
+        print("Schema cache cleared")
+
+# Convenience function for simple usage
+async def scrape_website(url: str, query: str, model_name: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Simple function to scrape any website with plain English instructions.
+    
+    Args:
+        url: Website URL
+        query: Plain English description of what data to extract
+        model_name: Name of saved model configuration to use
+        
+    Returns:
+        Extracted structured data
+    """
+    agent = WebScraperAgent()
+    return await agent.scrape_data(url, query, model_name)
+
+async def scrape_website_with_llm(url: str, query: str, model_name: Optional[str] = None):
+    """
+    Scrape structured data from any website using a custom LLM model.
+    
+    Args:
+        url: The website URL to scrape
+        query: Plain English description of what data to extract
+        model_name: Name of saved model configuration to use
+    """
+    agent = WebScraperAgent()
+    return await agent.scrape_data_with_llm(url, query, model_name)