"""Unit tests for head fingerprinting."""
import pytest
from crawl4ai.utils import compute_head_fingerprint
class TestHeadFingerprint:
"""Tests for the compute_head_fingerprint function."""
def test_same_content_same_fingerprint(self):
"""Identical
content produces same fingerprint."""
head = "Test Page"
fp1 = compute_head_fingerprint(head)
fp2 = compute_head_fingerprint(head)
assert fp1 == fp2
assert fp1 != ""
def test_different_title_different_fingerprint(self):
"""Different title produces different fingerprint."""
head1 = "Title A"
head2 = "Title B"
assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2)
def test_empty_head_returns_empty_string(self):
"""Empty or None head should return empty fingerprint."""
assert compute_head_fingerprint("") == ""
assert compute_head_fingerprint(None) == ""
def test_head_without_signals_returns_empty(self):
"""Head without title or key meta tags returns empty."""
head = ""
assert compute_head_fingerprint(head) == ""
def test_extracts_title(self):
"""Title is extracted and included in fingerprint."""
head1 = "My Title"
head2 = "My Title"
# Same title should produce same fingerprint
assert compute_head_fingerprint(head1) == compute_head_fingerprint(head2)
def test_extracts_meta_description(self):
"""Meta description is extracted."""
head1 = ''
head2 = ''
assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2)
def test_extracts_og_tags(self):
"""Open Graph tags are extracted."""
head1 = ''
head2 = ''
assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2)
def test_extracts_og_image(self):
"""og:image is extracted and affects fingerprint."""
head1 = ''
head2 = ''
assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2)
def test_extracts_article_modified_time(self):
"""article:modified_time is extracted."""
head1 = ''
head2 = ''
assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2)
def test_case_insensitive(self):
"""Fingerprinting is case-insensitive for tags."""
head1 = "Test"
head2 = "test"
# Both should extract title (case insensitive)
fp1 = compute_head_fingerprint(head1)
fp2 = compute_head_fingerprint(head2)
assert fp1 != ""
assert fp2 != ""
def test_handles_attribute_order(self):
"""Handles different attribute orders in meta tags."""
head1 = ''
head2 = ''
assert compute_head_fingerprint(head1) == compute_head_fingerprint(head2)
def test_real_world_head(self):
"""Test with a realistic head section."""
head = '''
Python Documentation
'''
fp = compute_head_fingerprint(head)
assert fp != ""
# Should be deterministic
assert fp == compute_head_fingerprint(head)