From ee717dc019f2ef592df5bd1f67f4718628ad2cfe Mon Sep 17 00:00:00 2001 From: unclecode Date: Sun, 1 Feb 2026 03:10:32 +0000 Subject: [PATCH] Add contributor for PR #1746 and fix test pytest marker - Add ChiragBellara to CONTRIBUTORS.md for sitemap seeding fix - Add missing @pytest.mark.asyncio decorator to seeder test --- CONTRIBUTORS.md | 1 + tests/general/test_url_seeder_for_only_sitemap.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 7206d715..82ddba39 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -24,6 +24,7 @@ We would like to thank the following people for their contributions to Crawl4AI: - [NanmiCoder](https://github.com/NanmiCoder) - fix: crawler strategy exception handling and fixes [#271](https://github.com/unclecode/crawl4ai/pull/271) - [paulokuong](https://github.com/paulokuong) - fix: RAWL4_AI_BASE_DIRECTORY should be Path object instead of string [#298](https://github.com/unclecode/crawl4ai/pull/298) - [TheRedRad](https://github.com/theredrad) - feat: add force viewport screenshot option [#1694](https://github.com/unclecode/crawl4ai/pull/1694) +- [ChiragBellara](https://github.com/ChiragBellara) - fix: avoid Common Crawl calls for sitemap-only URL seeding [#1746](https://github.com/unclecode/crawl4ai/pull/1746) #### Feb-Alpha-1 - [sufianuddin](https://github.com/sufianuddin) - fix: [Documentation for JsonCssExtractionStrategy](https://github.com/unclecode/crawl4ai/issues/651) diff --git a/tests/general/test_url_seeder_for_only_sitemap.py b/tests/general/test_url_seeder_for_only_sitemap.py index 892f3af7..63bb52df 100644 --- a/tests/general/test_url_seeder_for_only_sitemap.py +++ b/tests/general/test_url_seeder_for_only_sitemap.py @@ -1,9 +1,11 @@ import asyncio +import pytest from crawl4ai import AsyncLogger, AsyncUrlSeeder, SeedingConfig from pathlib import Path import httpx +@pytest.mark.asyncio async def test_sitemap_source_does_not_hit_commoncrawl(): config = SeedingConfig( source="sitemap",