diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 7206d715..82ddba39 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -24,6 +24,7 @@ We would like to thank the following people for their contributions to Crawl4AI: - [NanmiCoder](https://github.com/NanmiCoder) - fix: crawler strategy exception handling and fixes [#271](https://github.com/unclecode/crawl4ai/pull/271) - [paulokuong](https://github.com/paulokuong) - fix: RAWL4_AI_BASE_DIRECTORY should be Path object instead of string [#298](https://github.com/unclecode/crawl4ai/pull/298) - [TheRedRad](https://github.com/theredrad) - feat: add force viewport screenshot option [#1694](https://github.com/unclecode/crawl4ai/pull/1694) +- [ChiragBellara](https://github.com/ChiragBellara) - fix: avoid Common Crawl calls for sitemap-only URL seeding [#1746](https://github.com/unclecode/crawl4ai/pull/1746) #### Feb-Alpha-1 - [sufianuddin](https://github.com/sufianuddin) - fix: [Documentation for JsonCssExtractionStrategy](https://github.com/unclecode/crawl4ai/issues/651) diff --git a/tests/general/test_url_seeder_for_only_sitemap.py b/tests/general/test_url_seeder_for_only_sitemap.py index 892f3af7..63bb52df 100644 --- a/tests/general/test_url_seeder_for_only_sitemap.py +++ b/tests/general/test_url_seeder_for_only_sitemap.py @@ -1,9 +1,11 @@ import asyncio +import pytest from crawl4ai import AsyncLogger, AsyncUrlSeeder, SeedingConfig from pathlib import Path import httpx +@pytest.mark.asyncio async def test_sitemap_source_does_not_hit_commoncrawl(): config = SeedingConfig( source="sitemap",