#!/usr/bin/env python3
"""
Quick test script for Table Extraction feature
Tests the /tables/extract endpoint with sample HTML
Usage:
1. Start the server: python deploy/docker/server.py
2. Run this script: python tests/docker/test_table_extraction_quick.py
"""
import requests
import json
import sys
# Sample HTML with tables
SAMPLE_HTML = """
Test Tables
| Name | Age | City |
| Alice | 25 | New York |
| Bob | 30 | San Francisco |
| Charlie | 35 | Los Angeles |
| Quarter | Revenue | Profit |
| Q1 2024 | $1,250,000.00 | $400,000.00 |
| Q2 2024 | $1,500,000.00 | $600,000.00 |
| Q3 2024 | $1,750,000.00 | $700,000.00 |
"""
BASE_URL = "http://localhost:11234"
def test_server_health():
"""Check if server is running"""
try:
response = requests.get(f"{BASE_URL}/health", timeout=2)
if response.status_code == 200:
print("ā
Server is running")
return True
else:
print(f"ā Server health check failed: {response.status_code}")
return False
except requests.exceptions.RequestException as e:
print(f"ā Server not reachable: {e}")
print("\nš” Start the server with: python deploy/docker/server.py")
return False
def test_default_strategy():
"""Test default table extraction strategy"""
print("\nš Testing DEFAULT strategy...")
response = requests.post(f"{BASE_URL}/tables/extract", json={
"html": SAMPLE_HTML,
"config": {
"strategy": "default"
}
})
if response.status_code == 200:
data = response.json()
print(f"ā
Default strategy works!")
print(f" - Table count: {data['table_count']}")
print(f" - Strategy: {data['strategy']}")
if data['tables']:
for idx, table in enumerate(data['tables']):
print(f" - Table {idx + 1}: {len(table.get('rows', []))} rows")
return True
else:
print(f"ā Failed: {response.status_code}")
print(f" Error: {response.text}")
return False
def test_financial_strategy():
"""Test financial table extraction strategy"""
print("\nš° Testing FINANCIAL strategy...")
response = requests.post(f"{BASE_URL}/tables/extract", json={
"html": SAMPLE_HTML,
"config": {
"strategy": "financial",
"preserve_formatting": True,
"extract_metadata": True
}
})
if response.status_code == 200:
data = response.json()
print(f"ā
Financial strategy works!")
print(f" - Table count: {data['table_count']}")
print(f" - Strategy: {data['strategy']}")
return True
else:
print(f"ā Failed: {response.status_code}")
print(f" Error: {response.text}")
return False
def test_none_strategy():
"""Test none strategy (no extraction)"""
print("\nš« Testing NONE strategy...")
response = requests.post(f"{BASE_URL}/tables/extract", json={
"html": SAMPLE_HTML,
"config": {
"strategy": "none"
}
})
if response.status_code == 200:
data = response.json()
if data['table_count'] == 0:
print(f"ā
None strategy works (correctly extracted 0 tables)")
return True
else:
print(f"ā None strategy returned {data['table_count']} tables (expected 0)")
return False
else:
print(f"ā Failed: {response.status_code}")
return False
def test_batch_extraction():
"""Test batch extraction"""
print("\nš¦ Testing BATCH extraction...")
response = requests.post(f"{BASE_URL}/tables/extract/batch", json={
"html_list": [
SAMPLE_HTML,
""
],
"config": {
"strategy": "default"
}
})
if response.status_code == 200:
data = response.json()
print(f"ā
Batch extraction works!")
print(f" - Total processed: {data['summary']['total_processed']}")
print(f" - Successful: {data['summary']['successful']}")
print(f" - Total tables: {data['summary']['total_tables_extracted']}")
return True
else:
print(f"ā Failed: {response.status_code}")
print(f" Error: {response.text}")
return False
def test_error_handling():
"""Test error handling"""
print("\nā ļø Testing ERROR handling...")
# Test with both html and url (should fail)
response = requests.post(f"{BASE_URL}/tables/extract", json={
"html": "",
"url": "https://example.com",
"config": {"strategy": "default"}
})
if response.status_code == 400:
print(f"ā
Error handling works (correctly rejected invalid input)")
return True
else:
print(f"ā Expected 400 error, got: {response.status_code}")
return False
def main():
print("=" * 60)
print("Table Extraction Feature - Quick Test")
print("=" * 60)
# Check server
if not test_server_health():
sys.exit(1)
# Run tests
results = []
results.append(("Default Strategy", test_default_strategy()))
results.append(("Financial Strategy", test_financial_strategy()))
results.append(("None Strategy", test_none_strategy()))
results.append(("Batch Extraction", test_batch_extraction()))
results.append(("Error Handling", test_error_handling()))
# Summary
print("\n" + "=" * 60)
print("Test Summary")
print("=" * 60)
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "ā
PASS" if result else "ā FAIL"
print(f"{status}: {name}")
print(f"\nTotal: {passed}/{total} tests passed")
if passed == total:
print("\nš All tests passed! Table extraction is working correctly!")
sys.exit(0)
else:
print(f"\nā ļø {total - passed} test(s) failed")
sys.exit(1)
if __name__ == "__main__":
main()