"""Unit tests for head fingerprinting.""" import pytest from crawl4ai.utils import compute_head_fingerprint class TestHeadFingerprint: """Tests for the compute_head_fingerprint function.""" def test_same_content_same_fingerprint(self): """Identical content produces same fingerprint.""" head = "Test Page" fp1 = compute_head_fingerprint(head) fp2 = compute_head_fingerprint(head) assert fp1 == fp2 assert fp1 != "" def test_different_title_different_fingerprint(self): """Different title produces different fingerprint.""" head1 = "Title A" head2 = "Title B" assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2) def test_empty_head_returns_empty_string(self): """Empty or None head should return empty fingerprint.""" assert compute_head_fingerprint("") == "" assert compute_head_fingerprint(None) == "" def test_head_without_signals_returns_empty(self): """Head without title or key meta tags returns empty.""" head = "" assert compute_head_fingerprint(head) == "" def test_extracts_title(self): """Title is extracted and included in fingerprint.""" head1 = "My Title" head2 = "My Title" # Same title should produce same fingerprint assert compute_head_fingerprint(head1) == compute_head_fingerprint(head2) def test_extracts_meta_description(self): """Meta description is extracted.""" head1 = '' head2 = '' assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2) def test_extracts_og_tags(self): """Open Graph tags are extracted.""" head1 = '' head2 = '' assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2) def test_extracts_og_image(self): """og:image is extracted and affects fingerprint.""" head1 = '' head2 = '' assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2) def test_extracts_article_modified_time(self): """article:modified_time is extracted.""" head1 = '' head2 = '' assert compute_head_fingerprint(head1) != compute_head_fingerprint(head2) def test_case_insensitive(self): """Fingerprinting is case-insensitive for tags.""" head1 = "Test" head2 = "test" # Both should extract title (case insensitive) fp1 = compute_head_fingerprint(head1) fp2 = compute_head_fingerprint(head2) assert fp1 != "" assert fp2 != "" def test_handles_attribute_order(self): """Handles different attribute orders in meta tags.""" head1 = '' head2 = '' assert compute_head_fingerprint(head1) == compute_head_fingerprint(head2) def test_real_world_head(self): """Test with a realistic head section.""" head = ''' Python Documentation ''' fp = compute_head_fingerprint(head) assert fp != "" # Should be deterministic assert fp == compute_head_fingerprint(head)