Apply Ruff Corrections

2025-01-13 19:19:58 +08:00
parent c3370ec5da
commit 8ec12d7d68
84 changed files with 6861 additions and 5076 deletions
--- a/tests/async/test_chunking_and_extraction_strategies.py
+++ b/tests/async/test_chunking_and_extraction_strategies.py
@@ -1,7 +1,6 @@
 import os
 import sys
 import pytest
-import asyncio
 import json

 # Add the parent directory to the Python path
@@ -9,8 +8,9 @@ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)

 from crawl4ai.async_webcrawler import AsyncWebCrawler
-from crawl4ai.chunking_strategy import RegexChunking, NlpSentenceChunking
-from crawl4ai.extraction_strategy import CosineStrategy, LLMExtractionStrategy
+from crawl4ai.chunking_strategy import RegexChunking
+from crawl4ai.extraction_strategy import LLMExtractionStrategy
+

@pytest.mark.asyncio
 async def test_regex_chunking():
@@ -18,15 +18,14 @@ async def test_regex_chunking():
        url = "https://www.nbcnews.com/business"
        chunking_strategy = RegexChunking(patterns=["\n\n"])
        result = await crawler.arun(
-            url=url,
-            chunking_strategy=chunking_strategy,
-            bypass_cache=True
+            url=url, chunking_strategy=chunking_strategy, bypass_cache=True
        )
        assert result.success
        assert result.extracted_content
        chunks = json.loads(result.extracted_content)
        assert len(chunks) > 1  # Ensure multiple chunks were created

+
 # @pytest.mark.asyncio
 # async def test_cosine_strategy():
 #     async with AsyncWebCrawler(verbose=True) as crawler:
@@ -43,25 +42,25 @@ async def test_regex_chunking():
 #         assert len(extracted_data) > 0
 #         assert all('tags' in item for item in extracted_data)

+
@pytest.mark.asyncio
 async def test_llm_extraction_strategy():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        extraction_strategy = LLMExtractionStrategy(
            provider="openai/gpt-4o-mini",
-            api_token=os.getenv('OPENAI_API_KEY'),
-            instruction="Extract only content related to technology"
+            api_token=os.getenv("OPENAI_API_KEY"),
+            instruction="Extract only content related to technology",
        )
        result = await crawler.arun(
-            url=url,
-            extraction_strategy=extraction_strategy,
-            bypass_cache=True
+            url=url, extraction_strategy=extraction_strategy, bypass_cache=True
        )
        assert result.success
        assert result.extracted_content
        extracted_data = json.loads(result.extracted_content)
        assert len(extracted_data) > 0
-        assert all('content' in item for item in extracted_data)
+        assert all("content" in item for item in extracted_data)
+

 # @pytest.mark.asyncio
 # async def test_combined_chunking_and_extraction():
@@ -84,4 +83,4 @@ async def test_llm_extraction_strategy():

 # Entry point for debugging
 if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
+    pytest.main([__file__, "-v"])