Add link analysis tests and integration tests for /links/analyze endpoint

- Implemented `test_link_analysis` in `test_docker.py` to validate link analysis functionality. - Created `test_link_analysis.py` with comprehensive tests for link analysis, including basic functionality, configuration options, error handling, performance, and edge cases. - Added integration tests in `test_link_analysis_integration.py` to verify the /links/analyze endpoint, including health checks, authentication, and error handling.
2025-10-14 19:24:16 +08:00
parent 8cca9704eb
commit aebf5a3694
7 changed files with 1926 additions and 0 deletions
--- a/tests/test_docker.py
+++ b/tests/test_docker.py
@@ -70,6 +70,7 @@ def test_docker_deployment(version="basic"):
    # test_llm_extraction(tester)
    # test_llm_with_ollama(tester)
    # test_screenshot(tester)
+    test_link_analysis(tester)


 def test_basic_crawl(tester: Crawl4AiTester):
@@ -293,6 +294,77 @@ def test_screenshot(tester: Crawl4AiTester):
    assert result["result"]["success"]


+def test_link_analysis(tester: Crawl4AiTester):
+    print("\n=== Testing Link Analysis ===")
+
+    # Get auth token first
+    try:
+        token_response = requests.post(f"{tester.base_url}/token", json={"email": "test@example.com"})
+        token = token_response.json()["access_token"]
+        headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+    except Exception as e:
+        print(f"Could not get auth token: {e}")
+        headers = {"Content-Type": "application/json"}
+
+    # Test basic link analysis
+    request_data = {
+        "url": "https://www.nbcnews.com/business"
+    }
+
+    response = requests.post(
+        f"{tester.base_url}/links/analyze",
+        headers=headers,
+        json=request_data,
+        timeout=60
+    )
+
+    if response.status_code == 200:
+        result = response.json()
+        total_links = sum(len(links) for links in result.values())
+        print(f"Link analysis successful: found {total_links} links")
+
+        # Check for expected categories
+        categories_found = []
+        for category in ['internal', 'external', 'social', 'download', 'email', 'phone']:
+            if category in result and result[category]:
+                categories_found.append(category)
+
+        print(f"Link categories found: {categories_found}")
+
+        # Verify we have some links
+        assert total_links > 0, "Should find at least one link"
+        assert len(categories_found) > 0, "Should find at least one link category"
+
+        # Test with configuration
+        request_data_with_config = {
+            "url": "https://www.nbcnews.com/business",
+            "config": {
+                "simulate_user": True,
+                "override_navigator": True,
+                "word_count_threshold": 1
+            }
+        }
+
+        response_with_config = requests.post(
+            f"{tester.base_url}/links/analyze",
+            headers=headers,
+            json=request_data_with_config,
+            timeout=60
+        )
+
+        if response_with_config.status_code == 200:
+            result_with_config = response_with_config.json()
+            total_links_config = sum(len(links) for links in result_with_config.values())
+            print(f"Link analysis with config: found {total_links_config} links")
+            assert total_links_config > 0, "Should find links even with config"
+
+        print("✅ Link analysis tests passed")
+    else:
+        print(f"❌ Link analysis failed: {response.status_code} - {response.text}")
+        # Don't fail the entire test suite for this endpoint
+        print("⚠️  Link analysis test failed, but continuing with other tests")
+
+
 if __name__ == "__main__":
    version = sys.argv[1] if len(sys.argv) > 1 else "basic"
    # version = "full"
--- a/tests/test_link_analysis.py
+++ b/tests/test_link_analysis.py
@@ -0,0 +1,759 @@
+import requests
+import json
+import time
+import sys
+import os
+from typing import Dict, Any, List
+
+
+class LinkAnalysisTester:
+    def __init__(self, base_url: str = "http://localhost:11234"):
+        self.base_url = base_url
+        self.token = self.get_test_token()
+
+    def get_test_token(self) -> str:
+        """Get authentication token for testing"""
+        try:
+            # Try to get token using test email
+            response = requests.post(
+                f"{self.base_url}/token",
+                json={"email": "test@example.com"},
+                timeout=10
+            )
+            if response.status_code == 200:
+                return response.json()["access_token"]
+        except Exception:
+            pass
+
+        # Fallback: try with common test token or skip auth for local testing
+        return "test-token"
+
+    def analyze_links(
+        self,
+        url: str,
+        config: Dict[str, Any] = None,
+        timeout: int = 60
+    ) -> Dict[str, Any]:
+        """Analyze links on a webpage"""
+        headers = {
+            "Content-Type": "application/json"
+        }
+
+        # Add auth if token is available
+        if self.token and self.token != "test-token":
+            headers["Authorization"] = f"Bearer {self.token}"
+
+        request_data = {"url": url}
+        if config:
+            request_data["config"] = config
+
+        response = requests.post(
+            f"{self.base_url}/links/analyze",
+            headers=headers,
+            json=request_data,
+            timeout=timeout
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Link analysis failed: {response.status_code} - {response.text}")
+
+        return response.json()
+
+
+def test_link_analysis_basic():
+    """Test basic link analysis functionality"""
+    print("\n=== Testing Basic Link Analysis ===")
+
+    tester = LinkAnalysisTester()
+
+    # Test with a simple page
+    test_url = "https://httpbin.org/links/10"
+
+    try:
+        result = tester.analyze_links(test_url)
+        print(f"✅ Successfully analyzed links on {test_url}")
+
+        # Check response structure
+        expected_categories = ['internal', 'external', 'social', 'download', 'email', 'phone']
+        found_categories = [cat for cat in expected_categories if cat in result]
+
+        print(f"📊 Found link categories: {found_categories}")
+
+        # Count total links
+        total_links = sum(len(links) for links in result.values())
+        print(f"🔗 Total links found: {total_links}")
+
+        # Verify link objects have expected fields
+        for category, links in result.items():
+            if links and len(links) > 0:
+                sample_link = links[0]
+                expected_fields = ['href', 'text']
+                optional_fields = ['title', 'base_domain', 'intrinsic_score', 'contextual_score', 'total_score']
+
+                missing_required = [field for field in expected_fields if field not in sample_link]
+                found_optional = [field for field in optional_fields if field in sample_link]
+
+                if missing_required:
+                    print(f"⚠️  Missing required fields in {category}: {missing_required}")
+                else:
+                    print(f"✅ {category} links have proper structure (has {len(found_optional)} optional fields: {found_optional})")
+
+        assert total_links > 0, "Should find at least one link"
+        print("✅ Basic link analysis test passed")
+
+    except Exception as e:
+        print(f"❌ Basic link analysis test failed: {str(e)}")
+        raise
+
+
+def test_link_analysis_with_config():
+    """Test link analysis with custom configuration"""
+    print("\n=== Testing Link Analysis with Config ===")
+
+    tester = LinkAnalysisTester()
+
+    # Test with valid LinkPreviewConfig options
+    config = {
+        "include_internal": True,
+        "include_external": True,
+        "max_links": 50,
+        "score_threshold": 0.3,
+        "verbose": True
+    }
+
+    test_url = "https://httpbin.org/links/10"
+
+    try:
+        result = tester.analyze_links(test_url, config)
+        print(f"✅ Successfully analyzed links with custom config")
+
+        # Verify configuration was applied
+        total_links = sum(len(links) for links in result.values())
+        print(f"🔗 Links found with config: {total_links}")
+
+        assert total_links > 0, "Should find links even with config"
+        print("✅ Config test passed")
+
+    except Exception as e:
+        print(f"❌ Config test failed: {str(e)}")
+        raise
+
+
+def test_link_analysis_complex_page():
+    """Test link analysis on a more complex page"""
+    print("\n=== Testing Link Analysis on Complex Page ===")
+
+    tester = LinkAnalysisTester()
+
+    # Test with a real-world page
+    test_url = "https://www.python.org"
+
+    try:
+        result = tester.analyze_links(test_url)
+        print(f"✅ Successfully analyzed links on {test_url}")
+
+        # Analyze link distribution
+        category_counts = {}
+        for category, links in result.items():
+            if links:
+                category_counts[category] = len(links)
+                print(f"📂 {category}: {len(links)} links")
+
+        # Find top-scoring links
+        all_links = []
+        for category, links in result.items():
+            if links:
+                for link in links:
+                    link['category'] = category
+                    all_links.append(link)
+
+        if all_links:
+            # Use intrinsic_score or total_score if available, fallback to 0
+            top_links = sorted(all_links, key=lambda x: x.get('total_score', x.get('intrinsic_score', 0)), reverse=True)[:5]
+            print("\n🏆 Top 5 links by score:")
+            for i, link in enumerate(top_links, 1):
+                score = link.get('total_score', link.get('intrinsic_score', 0))
+                print(f"  {i}. {link.get('text', 'N/A')} ({score:.2f}) - {link.get('category', 'unknown')}")
+
+        # Verify we found different types of links
+        assert len(category_counts) > 0, "Should find at least one link category"
+        print("✅ Complex page analysis test passed")
+
+    except Exception as e:
+        print(f"❌ Complex page analysis test failed: {str(e)}")
+        # Don't fail the test suite for network issues
+        print("⚠️  This test may fail due to network connectivity issues")
+
+
+def test_link_analysis_scoring():
+    """Test link scoring functionality"""
+    print("\n=== Testing Link Scoring ===")
+
+    tester = LinkAnalysisTester()
+
+    test_url = "https://httpbin.org/links/10"
+
+    try:
+        result = tester.analyze_links(test_url)
+
+        # Analyze score distribution
+        all_scores = []
+        for category, links in result.items():
+            if links:
+                for link in links:
+                    # Use total_score or intrinsic_score if available
+                    score = link.get('total_score', link.get('intrinsic_score', 0))
+                    if score is not None:  # Only include links that have scores
+                        all_scores.append(score)
+
+        if all_scores:
+            avg_score = sum(all_scores) / len(all_scores)
+            max_score = max(all_scores)
+            min_score = min(all_scores)
+
+            print(f"📊 Score statistics:")
+            print(f"   Average: {avg_score:.3f}")
+            print(f"   Maximum: {max_score:.3f}")
+            print(f"   Minimum: {min_score:.3f}")
+            print(f"   Total links scored: {len(all_scores)}")
+
+            # Verify scores are in expected range
+            assert all(0 <= score <= 1 for score in all_scores), "Scores should be between 0 and 1"
+            print("✅ All scores are in valid range")
+
+        print("✅ Link scoring test passed")
+
+    except Exception as e:
+        print(f"❌ Link scoring test failed: {str(e)}")
+        raise
+
+
+def test_link_analysis_error_handling():
+    """Test error handling for invalid requests"""
+    print("\n=== Testing Error Handling ===")
+
+    tester = LinkAnalysisTester()
+
+    # Test with invalid URL
+    try:
+        tester.analyze_links("not-a-valid-url")
+        print("⚠️  Expected error for invalid URL, but got success")
+    except Exception as e:
+        print(f"✅ Correctly handled invalid URL: {str(e)}")
+
+    # Test with non-existent URL
+    try:
+        result = tester.analyze_links("https://this-domain-does-not-exist-12345.com")
+        print("⚠️  This should have failed for non-existent domain")
+    except Exception as e:
+        print(f"✅ Correctly handled non-existent domain: {str(e)}")
+
+    print("✅ Error handling test passed")
+
+
+def test_link_analysis_performance():
+    """Test performance of link analysis"""
+    print("\n=== Testing Performance ===")
+
+    tester = LinkAnalysisTester()
+
+    test_url = "https://httpbin.org/links/50"
+
+    try:
+        start_time = time.time()
+        result = tester.analyze_links(test_url)
+        end_time = time.time()
+
+        duration = end_time - start_time
+        total_links = sum(len(links) for links in result.values())
+
+        print(f"⏱️  Analysis completed in {duration:.2f} seconds")
+        print(f"🔗 Found {total_links} links")
+        print(f"📈 Rate: {total_links/duration:.1f} links/second")
+
+        # Performance should be reasonable
+        assert duration < 60, f"Analysis took too long: {duration:.2f}s"
+        print("✅ Performance test passed")
+
+    except Exception as e:
+        print(f"❌ Performance test failed: {str(e)}")
+        raise
+
+
+def test_link_analysis_categorization():
+    """Test link categorization functionality"""
+    print("\n=== Testing Link Categorization ===")
+
+    tester = LinkAnalysisTester()
+
+    test_url = "https://www.python.org"
+
+    try:
+        result = tester.analyze_links(test_url)
+
+        # Check categorization
+        categories_found = []
+        for category, links in result.items():
+            if links:
+                categories_found.append(category)
+                print(f"📂 {category}: {len(links)} links")
+
+                # Analyze a sample link from each category
+                sample_link = links[0]
+                url = sample_link.get('href', '')
+                text = sample_link.get('text', '')
+                score = sample_link.get('total_score', sample_link.get('intrinsic_score', 0))
+
+                print(f"   Sample: {text[:50]}... ({url[:50]}...) - score: {score:.2f}")
+
+        print(f"✅ Found {len(categories_found)} link categories")
+        print("✅ Categorization test passed")
+
+    except Exception as e:
+        print(f"❌ Categorization test failed: {str(e)}")
+        # Don't fail for network issues
+        print("⚠️  This test may fail due to network connectivity issues")
+
+
+def test_link_analysis_all_config_options():
+    """Test all available LinkPreviewConfig options"""
+    print("\n=== Testing All Configuration Options ===")
+
+    tester = LinkAnalysisTester()
+    test_url = "https://httpbin.org/links/10"
+
+    # Test 1: include_internal and include_external
+    print("\n🔍 Testing include_internal/include_external options...")
+
+    configs = [
+        {
+            "name": "Internal only",
+            "config": {"include_internal": True, "include_external": False}
+        },
+        {
+            "name": "External only",
+            "config": {"include_internal": False, "include_external": True}
+        },
+        {
+            "name": "Both internal and external",
+            "config": {"include_internal": True, "include_external": True}
+        }
+    ]
+
+    for test_case in configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            internal_count = len(result.get('internal', []))
+            external_count = len(result.get('external', []))
+
+            print(f"   {test_case['name']}: {internal_count} internal, {external_count} external links")
+
+            # Verify configuration behavior
+            if test_case["config"]["include_internal"] and not test_case["config"]["include_external"]:
+                assert internal_count >= 0, "Should have internal links"
+            elif not test_case["config"]["include_internal"] and test_case["config"]["include_external"]:
+                assert external_count >= 0, "Should have external links"
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    # Test 2: include_patterns and exclude_patterns
+    print("\n🔍 Testing include/exclude patterns...")
+
+    pattern_configs = [
+        {
+            "name": "Include specific patterns",
+            "config": {
+                "include_patterns": ["*/links/*", "*/test*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Exclude specific patterns",
+            "config": {
+                "exclude_patterns": ["*/admin*", "*/login*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Both include and exclude patterns",
+            "config": {
+                "include_patterns": ["*"],
+                "exclude_patterns": ["*/exclude*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        }
+    ]
+
+    for test_case in pattern_configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            total_links = sum(len(links) for links in result.values())
+            print(f"   {test_case['name']}: {total_links} links found")
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    # Test 3: Performance options (concurrency, timeout, max_links)
+    print("\n🔍 Testing performance options...")
+
+    perf_configs = [
+        {
+            "name": "Low concurrency",
+            "config": {
+                "concurrency": 1,
+                "timeout": 10,
+                "max_links": 50,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "High concurrency",
+            "config": {
+                "concurrency": 5,
+                "timeout": 15,
+                "max_links": 200,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Very limited",
+            "config": {
+                "concurrency": 1,
+                "timeout": 2,
+                "max_links": 5,
+                "include_internal": True,
+                "include_external": True
+            }
+        }
+    ]
+
+    for test_case in perf_configs:
+        try:
+            start_time = time.time()
+            result = tester.analyze_links(test_url, test_case["config"])
+            end_time = time.time()
+
+            total_links = sum(len(links) for links in result.values())
+            duration = end_time - start_time
+
+            print(f"   {test_case['name']}: {total_links} links in {duration:.2f}s")
+
+            # Verify max_links constraint
+            if total_links > test_case["config"]["max_links"]:
+                print(f"   ⚠️  Found {total_links} links, expected max {test_case['config']['max_links']}")
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    # Test 4: Scoring and filtering options
+    print("\n🔍 Testing scoring and filtering options...")
+
+    scoring_configs = [
+        {
+            "name": "No score threshold",
+            "config": {
+                "score_threshold": None,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Low score threshold",
+            "config": {
+                "score_threshold": 0.1,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "High score threshold",
+            "config": {
+                "score_threshold": 0.8,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "With query for contextual scoring",
+            "config": {
+                "query": "test links",
+                "score_threshold": 0.3,
+                "include_internal": True,
+                "include_external": True
+            }
+        }
+    ]
+
+    for test_case in scoring_configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            total_links = sum(len(links) for links in result.values())
+
+            # Check score threshold
+            if test_case["config"]["score_threshold"] is not None:
+                min_score = test_case["config"]["score_threshold"]
+                low_score_links = 0
+
+                for links in result.values():
+                    for link in links:
+                        score = link.get('total_score', link.get('intrinsic_score', 0))
+                        if score is not None and score < min_score:
+                            low_score_links += 1
+
+                if low_score_links > 0:
+                    print(f"   ⚠️  Found {low_score_links} links below threshold {min_score}")
+                else:
+                    print(f"   ✅ All links meet threshold {min_score}")
+
+            print(f"   {test_case['name']}: {total_links} links")
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    # Test 5: Verbose mode
+    print("\n🔍 Testing verbose mode...")
+
+    try:
+        result = tester.analyze_links(test_url, {
+            "verbose": True,
+            "include_internal": True,
+            "include_external": True
+        })
+        total_links = sum(len(links) for links in result.values())
+        print(f"   Verbose mode: {total_links} links")
+
+    except Exception as e:
+        print(f"   ❌ Verbose mode failed: {e}")
+
+    print("✅ All configuration options test passed")
+
+
+def test_link_analysis_edge_cases():
+    """Test edge cases and error scenarios for configuration options"""
+    print("\n=== Testing Edge Cases ===")
+
+    tester = LinkAnalysisTester()
+    test_url = "https://httpbin.org/links/10"
+
+    # Test 1: Invalid configuration values
+    print("\n🔍 Testing invalid configuration values...")
+
+    invalid_configs = [
+        {
+            "name": "Negative concurrency",
+            "config": {"concurrency": -1}
+        },
+        {
+            "name": "Zero timeout",
+            "config": {"timeout": 0}
+        },
+        {
+            "name": "Negative max_links",
+            "config": {"max_links": -5}
+        },
+        {
+            "name": "Invalid score threshold (too high)",
+            "config": {"score_threshold": 1.5}
+        },
+        {
+            "name": "Invalid score threshold (too low)",
+            "config": {"score_threshold": -0.1}
+        },
+        {
+            "name": "Both include flags false",
+            "config": {"include_internal": False, "include_external": False}
+        }
+    ]
+
+    for test_case in invalid_configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            print(f"   ⚠️  {test_case['name']}: Expected to fail but succeeded")
+
+        except Exception as e:
+            print(f"   ✅ {test_case['name']}: Correctly failed - {str(e)}")
+
+    # Test 2: Extreme but valid values
+    print("\n🔍 Testing extreme valid values...")
+
+    extreme_configs = [
+        {
+            "name": "Very high concurrency",
+            "config": {
+                "concurrency": 50,
+                "timeout": 30,
+                "max_links": 1000,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Very low score threshold",
+            "config": {
+                "score_threshold": 0.0,
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Very high score threshold",
+            "config": {
+                "score_threshold": 1.0,
+                "include_internal": True,
+                "include_external": True
+            }
+        }
+    ]
+
+    for test_case in extreme_configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            total_links = sum(len(links) for links in result.values())
+            print(f"   ✅ {test_case['name']}: {total_links} links")
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    # Test 3: Complex pattern matching
+    print("\n🔍 Testing complex pattern matching...")
+
+    pattern_configs = [
+        {
+            "name": "Multiple include patterns",
+            "config": {
+                "include_patterns": ["*/links/*", "*/test*", "*/httpbin*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Multiple exclude patterns",
+            "config": {
+                "exclude_patterns": ["*/admin*", "*/login*", "*/logout*", "*/private*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        },
+        {
+            "name": "Overlapping include/exclude patterns",
+            "config": {
+                "include_patterns": ["*"],
+                "exclude_patterns": ["*/admin*", "*/private*"],
+                "include_internal": True,
+                "include_external": True
+            }
+        }
+    ]
+
+    for test_case in pattern_configs:
+        try:
+            result = tester.analyze_links(test_url, test_case["config"])
+            total_links = sum(len(links) for links in result.values())
+            print(f"   {test_case['name']}: {total_links} links")
+
+        except Exception as e:
+            print(f"   ❌ {test_case['name']} failed: {e}")
+
+    print("✅ Edge cases test passed")
+
+
+def test_link_analysis_batch():
+    """Test batch link analysis"""
+    print("\n=== Testing Batch Analysis ===")
+
+    tester = LinkAnalysisTester()
+
+    test_urls = [
+        "https://httpbin.org/links/10",
+        "https://httpbin.org/links/5",
+        "https://httpbin.org/links/2"
+    ]
+
+    try:
+        results = {}
+        for url in test_urls:
+            print(f"🔍 Analyzing: {url}")
+            result = tester.analyze_links(url)
+            results[url] = result
+
+            # Small delay to be respectful
+            time.sleep(0.5)
+
+        print(f"✅ Successfully analyzed {len(results)} URLs")
+
+        for url, result in results.items():
+            total_links = sum(len(links) for links in result.values())
+            print(f"   {url}: {total_links} links")
+
+        print("✅ Batch analysis test passed")
+
+    except Exception as e:
+        print(f"❌ Batch analysis test failed: {str(e)}")
+        raise
+
+
+def run_all_link_analysis_tests():
+    """Run all link analysis tests"""
+    print("🚀 Starting Link Analysis Test Suite")
+    print("=" * 50)
+
+    tests = [
+        test_link_analysis_basic,
+        test_link_analysis_with_config,
+        test_link_analysis_complex_page,
+        test_link_analysis_scoring,
+        test_link_analysis_error_handling,
+        test_link_analysis_performance,
+        test_link_analysis_categorization,
+        test_link_analysis_batch
+    ]
+
+    passed = 0
+    failed = 0
+
+    for test_func in tests:
+        try:
+            test_func()
+            passed += 1
+            print(f"✅ {test_func.__name__} PASSED")
+        except Exception as e:
+            failed += 1
+            print(f"❌ {test_func.__name__} FAILED: {str(e)}")
+
+        print("-" * 50)
+
+    print(f"\n📊 Test Results: {passed} passed, {failed} failed")
+
+    if failed > 0:
+        print("⚠️  Some tests failed, but this may be due to network or server issues")
+        return False
+
+    print("🎉 All tests passed!")
+    return True
+
+
+if __name__ == "__main__":
+    # Check if server is running
+    import socket
+
+    def check_server(host="localhost", port=11234):
+        try:
+            socket.create_connection((host, port), timeout=5)
+            return True
+        except:
+            return False
+
+    if not check_server():
+        print("❌ Server is not running on localhost:11234")
+        print("Please start the Crawl4AI server first:")
+        print("  cd deploy/docker && python server.py")
+        sys.exit(1)
+
+    success = run_all_link_analysis_tests()
+    sys.exit(0 if success else 1)
--- a/tests/test_link_analysis_integration.py
+++ b/tests/test_link_analysis_integration.py
@@ -0,0 +1,169 @@
+import requests
+import json
+import time
+import sys
+
+
+def test_links_analyze_endpoint():
+    """Integration test for the /links/analyze endpoint"""
+
+    base_url = "http://localhost:11234"
+
+    # Health check
+    try:
+        health_response = requests.get(f"{base_url}/health", timeout=5)
+        if health_response.status_code != 200:
+            print("❌ Server health check failed")
+            return False
+        print("✅ Server health check passed")
+    except Exception as e:
+        print(f"❌ Cannot connect to server: {e}")
+        return False
+
+    # Get auth token
+    token = None
+    try:
+        token_response = requests.post(
+            f"{base_url}/token",
+            json={"email": "test@example.com"},
+            timeout=5
+        )
+        if token_response.status_code == 200:
+            token = token_response.json()["access_token"]
+            print("✅ Authentication token obtained")
+    except Exception as e:
+        print(f"⚠️  Could not get auth token: {e}")
+
+    # Test the links/analyze endpoint
+    headers = {"Content-Type": "application/json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+
+    # Test 1: Basic request
+    print("\n🔍 Testing basic link analysis...")
+    test_data = {
+        "url": "https://httpbin.org/links/10",
+        "config": {
+            "include_internal": True,
+            "include_external": True,
+            "max_links": 50,
+            "verbose": True
+        }
+    }
+
+    try:
+        response = requests.post(
+            f"{base_url}/links/analyze",
+            headers=headers,
+            json=test_data,
+            timeout=30
+        )
+
+        if response.status_code == 200:
+            result = response.json()
+            print("✅ Basic link analysis successful")
+            print(f"📄 Response structure: {list(result.keys())}")
+
+            # Verify response structure
+            total_links = sum(len(links) for links in result.values())
+            print(f"📊 Found {total_links} total links")
+
+            # Debug: Show what was actually returned
+            if total_links == 0:
+                print("⚠️  No links found - showing full response:")
+                print(json.dumps(result, indent=2))
+
+            # Check for expected categories
+            found_categories = []
+            for category in ['internal', 'external', 'social', 'download', 'email', 'phone']:
+                if category in result and result[category]:
+                    found_categories.append(category)
+
+            print(f"📂 Found categories: {found_categories}")
+
+            # Verify link objects have required fields
+            if total_links > 0:
+                sample_found = False
+                for category, links in result.items():
+                    if links:
+                        sample_link = links[0]
+                        if 'href' in sample_link and 'total_score' in sample_link:
+                            sample_found = True
+                            break
+
+                if sample_found:
+                    print("✅ Link objects have required fields")
+                else:
+                    print("⚠️  Link objects missing required fields")
+
+        else:
+            print(f"❌ Basic link analysis failed: {response.status_code}")
+            print(f"Response: {response.text}")
+            return False
+
+    except Exception as e:
+        print(f"❌ Basic link analysis error: {e}")
+        return False
+
+    # Test 2: With configuration
+    print("\n🔍 Testing link analysis with configuration...")
+    test_data_with_config = {
+        "url": "https://httpbin.org/links/10",
+        "config": {
+            "include_internal": True,
+            "include_external": True,
+            "max_links": 50,
+            "timeout": 10,
+            "verbose": True
+        }
+    }
+
+    try:
+        response = requests.post(
+            f"{base_url}/links/analyze",
+            headers=headers,
+            json=test_data_with_config,
+            timeout=30
+        )
+
+        if response.status_code == 200:
+            result = response.json()
+            total_links = sum(len(links) for links in result.values())
+            print(f"✅ Link analysis with config successful ({total_links} links)")
+        else:
+            print(f"❌ Link analysis with config failed: {response.status_code}")
+            return False
+
+    except Exception as e:
+        print(f"❌ Link analysis with config error: {e}")
+        return False
+
+    # Test 3: Error handling
+    print("\n🔍 Testing error handling...")
+    invalid_data = {
+        "url": "not-a-valid-url"
+    }
+
+    try:
+        response = requests.post(
+            f"{base_url}/links/analyze",
+            headers=headers,
+            json=invalid_data,
+            timeout=30
+        )
+
+        if response.status_code >= 400:
+            print("✅ Error handling works correctly")
+        else:
+            print("⚠️  Expected error for invalid URL, but got success")
+
+    except Exception as e:
+        print(f"✅ Error handling caught exception: {e}")
+
+    print("\n🎉 All integration tests passed!")
+    return True
+
+
+if __name__ == "__main__":
+    success = test_links_analyze_endpoint()
+    sys.exit(0 if success else 1)