#!/usr/bin/env python3 """ Quick test script for Table Extraction feature Tests the /tables/extract endpoint with sample HTML Usage: 1. Start the server: python deploy/docker/server.py 2. Run this script: python tests/docker/test_table_extraction_quick.py """ import requests import json import sys # Sample HTML with tables SAMPLE_HTML = """

Test Tables

NameAgeCity
Alice25New York
Bob30San Francisco
Charlie35Los Angeles
QuarterRevenueProfit
Q1 2024$1,250,000.00$400,000.00
Q2 2024$1,500,000.00$600,000.00
Q3 2024$1,750,000.00$700,000.00
""" BASE_URL = "http://localhost:11234" def test_server_health(): """Check if server is running""" try: response = requests.get(f"{BASE_URL}/health", timeout=2) if response.status_code == 200: print("āœ… Server is running") return True else: print(f"āŒ Server health check failed: {response.status_code}") return False except requests.exceptions.RequestException as e: print(f"āŒ Server not reachable: {e}") print("\nšŸ’” Start the server with: python deploy/docker/server.py") return False def test_default_strategy(): """Test default table extraction strategy""" print("\nšŸ“Š Testing DEFAULT strategy...") response = requests.post(f"{BASE_URL}/tables/extract", json={ "html": SAMPLE_HTML, "config": { "strategy": "default" } }) if response.status_code == 200: data = response.json() print(f"āœ… Default strategy works!") print(f" - Table count: {data['table_count']}") print(f" - Strategy: {data['strategy']}") if data['tables']: for idx, table in enumerate(data['tables']): print(f" - Table {idx + 1}: {len(table.get('rows', []))} rows") return True else: print(f"āŒ Failed: {response.status_code}") print(f" Error: {response.text}") return False def test_financial_strategy(): """Test financial table extraction strategy""" print("\nšŸ’° Testing FINANCIAL strategy...") response = requests.post(f"{BASE_URL}/tables/extract", json={ "html": SAMPLE_HTML, "config": { "strategy": "financial", "preserve_formatting": True, "extract_metadata": True } }) if response.status_code == 200: data = response.json() print(f"āœ… Financial strategy works!") print(f" - Table count: {data['table_count']}") print(f" - Strategy: {data['strategy']}") return True else: print(f"āŒ Failed: {response.status_code}") print(f" Error: {response.text}") return False def test_none_strategy(): """Test none strategy (no extraction)""" print("\n🚫 Testing NONE strategy...") response = requests.post(f"{BASE_URL}/tables/extract", json={ "html": SAMPLE_HTML, "config": { "strategy": "none" } }) if response.status_code == 200: data = response.json() if data['table_count'] == 0: print(f"āœ… None strategy works (correctly extracted 0 tables)") return True else: print(f"āŒ None strategy returned {data['table_count']} tables (expected 0)") return False else: print(f"āŒ Failed: {response.status_code}") return False def test_batch_extraction(): """Test batch extraction""" print("\nšŸ“¦ Testing BATCH extraction...") response = requests.post(f"{BASE_URL}/tables/extract/batch", json={ "html_list": [ SAMPLE_HTML, "
Col1
Val1
" ], "config": { "strategy": "default" } }) if response.status_code == 200: data = response.json() print(f"āœ… Batch extraction works!") print(f" - Total processed: {data['summary']['total_processed']}") print(f" - Successful: {data['summary']['successful']}") print(f" - Total tables: {data['summary']['total_tables_extracted']}") return True else: print(f"āŒ Failed: {response.status_code}") print(f" Error: {response.text}") return False def test_error_handling(): """Test error handling""" print("\nāš ļø Testing ERROR handling...") # Test with both html and url (should fail) response = requests.post(f"{BASE_URL}/tables/extract", json={ "html": "
", "url": "https://example.com", "config": {"strategy": "default"} }) if response.status_code == 400: print(f"āœ… Error handling works (correctly rejected invalid input)") return True else: print(f"āŒ Expected 400 error, got: {response.status_code}") return False def main(): print("=" * 60) print("Table Extraction Feature - Quick Test") print("=" * 60) # Check server if not test_server_health(): sys.exit(1) # Run tests results = [] results.append(("Default Strategy", test_default_strategy())) results.append(("Financial Strategy", test_financial_strategy())) results.append(("None Strategy", test_none_strategy())) results.append(("Batch Extraction", test_batch_extraction())) results.append(("Error Handling", test_error_handling())) # Summary print("\n" + "=" * 60) print("Test Summary") print("=" * 60) passed = sum(1 for _, result in results if result) total = len(results) for name, result in results: status = "āœ… PASS" if result else "āŒ FAIL" print(f"{status}: {name}") print(f"\nTotal: {passed}/{total} tests passed") if passed == total: print("\nšŸŽ‰ All tests passed! Table extraction is working correctly!") sys.exit(0) else: print(f"\nāš ļø {total - passed} test(s) failed") sys.exit(1) if __name__ == "__main__": main()