This commit adds a complete, web scraping API example that demonstrates how to get structured data from any website and use it like an API using the crawl4ai library with a minimalist frontend interface. Core Functionality - AI-powered web scraping with plain English queries - Dual scraping approaches: Schema-based (faster) and LLM-based (flexible) - Intelligent schema caching for improved performance - Custom LLM model support with API key management - Automatic duplicate request prevention Modern Frontend Interface - Minimalist black-and-white design inspired by modern web apps - Responsive layout with smooth animations and transitions - Three main pages: Scrape Data, Models Management, API Request History - Real-time results display with JSON formatting - Copy-to-clipboard functionality for extracted data - Toast notifications for user feedback - Auto-scroll to results when scraping starts Model Management System - Web-based model configuration interface - Support for any LLM provider (OpenAI, Gemini, Anthropic, etc.) - Simplified configuration requiring only provider and API token - Add, list, and delete model configurations - Secure storage of API keys in local JSON files API Request History - Automatic saving of all API requests and responses - Display of request history with URL, query, and cURL commands - Duplicate prevention (same URL + query combinations) - Request deletion functionality - Clean, simplified display focusing on essential information Technical Implementation Backend (FastAPI) - RESTful API with comprehensive endpoints - Pydantic models for request/response validation - Async web scraping with crawl4ai library - Error handling with detailed error messages - File-based storage for models and request history Frontend (Vanilla JS/CSS/HTML) - No framework dependencies - pure HTML, CSS, JavaScript - Modern CSS Grid and Flexbox layouts - Custom dropdown styling with SVG arrows - Responsive design for mobile and desktop - Smooth scrolling and animations Core Library Integration - WebScraperAgent class for orchestration - ModelConfig class for LLM configuration management - Schema generation and caching system - LLM extraction strategy support - Browser configuration with headless mode
67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for the new model management functionality.
|
|
This script demonstrates how to save and use custom model configurations.
|
|
"""
|
|
|
|
import asyncio
|
|
import requests
|
|
import json
|
|
|
|
# API base URL
|
|
BASE_URL = "http://localhost:8000"
|
|
|
|
def test_model_management():
|
|
"""Test the model management endpoints."""
|
|
|
|
print("=== Testing Model Management ===")
|
|
|
|
# 1. List current models
|
|
print("\n1. Listing current models:")
|
|
response = requests.get(f"{BASE_URL}/models")
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
|
|
|
|
|
# 2. Save another model configuration (OpenAI example)
|
|
print("\n2. Saving OpenAI model configuration:")
|
|
openai_config = {
|
|
"model_name": "my-openai",
|
|
"provider": "openai",
|
|
"api_token": "your-openai-api-key-here"
|
|
}
|
|
|
|
response = requests.post(f"{BASE_URL}/models", json=openai_config)
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
|
|
|
# 3. List models again to see the new ones
|
|
print("\n3. Listing models after adding new ones:")
|
|
response = requests.get(f"{BASE_URL}/models")
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
|
|
|
# 4. Delete a model configuration
|
|
print("\n4. Deleting a model configuration:")
|
|
response = requests.delete(f"{BASE_URL}/models/my-openai")
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
|
|
|
# 5. Final list of models
|
|
print("\n5. Final list of models:")
|
|
response = requests.get(f"{BASE_URL}/models")
|
|
print(f"Status: {response.status_code}")
|
|
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
|
|
|
if __name__ == "__main__":
|
|
print("Model Management Test Script")
|
|
print("Make sure the API server is running on http://localhost:8000")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
test_model_management()
|
|
except requests.exceptions.ConnectionError:
|
|
print("Error: Could not connect to the API server.")
|
|
print("Make sure the server is running with: python api_server.py")
|
|
except Exception as e:
|
|
print(f"Error: {e}") |