Add Async Version, JsonCss Extrator
This commit is contained in:
82
tests/async/test_database_operations.py
Normal file
82
tests/async/test_database_operations.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
# Add the parent directory to the Python path
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from crawl4ai.async_webcrawler import AsyncWebCrawler
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_url():
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
url = "https://www.example.com"
|
||||
# First run to cache the URL
|
||||
result1 = await crawler.arun(url=url, bypass_cache=True)
|
||||
assert result1.success
|
||||
|
||||
# Second run to retrieve from cache
|
||||
result2 = await crawler.arun(url=url, bypass_cache=False)
|
||||
assert result2.success
|
||||
assert result2.html == result1.html
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bypass_cache():
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
url = "https://www.python.org"
|
||||
# First run to cache the URL
|
||||
result1 = await crawler.arun(url=url, bypass_cache=True)
|
||||
assert result1.success
|
||||
|
||||
# Second run bypassing cache
|
||||
result2 = await crawler.arun(url=url, bypass_cache=True)
|
||||
assert result2.success
|
||||
assert result2.html != result1.html # Content might be different due to dynamic nature of websites
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cache_size():
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
initial_size = await crawler.aget_cache_size()
|
||||
|
||||
url = "https://www.nbcnews.com/business"
|
||||
await crawler.arun(url=url, bypass_cache=True)
|
||||
|
||||
new_size = await crawler.aget_cache_size()
|
||||
assert new_size == initial_size + 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear_cache():
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
url = "https://www.example.org"
|
||||
await crawler.arun(url=url, bypass_cache=True)
|
||||
|
||||
initial_size = await crawler.aget_cache_size()
|
||||
assert initial_size > 0
|
||||
|
||||
await crawler.aclear_cache()
|
||||
new_size = await crawler.aget_cache_size()
|
||||
assert new_size == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_flush_cache():
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
url = "https://www.example.net"
|
||||
await crawler.arun(url=url, bypass_cache=True)
|
||||
|
||||
initial_size = await crawler.aget_cache_size()
|
||||
assert initial_size > 0
|
||||
|
||||
await crawler.aflush_cache()
|
||||
new_size = await crawler.aget_cache_size()
|
||||
assert new_size == 0
|
||||
|
||||
# Try to retrieve the previously cached URL
|
||||
result = await crawler.arun(url=url, bypass_cache=False)
|
||||
assert result.success # The crawler should still succeed, but it will fetch the content anew
|
||||
|
||||
# Entry point for debugging
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user