Merge pull request #1384 from unclecode/fix/update_docker_examples
docs: remove CRAWL4AI_API_TOKEN references and use correct endpoints in Docker example scripts (#1015)
This commit is contained in:
@@ -8,26 +8,20 @@ from typing import Dict, Any
|
|||||||
|
|
||||||
|
|
||||||
class Crawl4AiTester:
|
class Crawl4AiTester:
|
||||||
def __init__(self, base_url: str = "http://localhost:11235", api_token: str = None):
|
def __init__(self, base_url: str = "http://localhost:11235"):
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
self.api_token = (
|
|
||||||
api_token or os.getenv("CRAWL4AI_API_TOKEN") or "test_api_code"
|
|
||||||
) # Check environment variable as fallback
|
|
||||||
self.headers = (
|
|
||||||
{"Authorization": f"Bearer {self.api_token}"} if self.api_token else {}
|
|
||||||
)
|
|
||||||
|
|
||||||
def submit_and_wait(
|
def submit_and_wait(
|
||||||
self, request_data: Dict[str, Any], timeout: int = 300
|
self, request_data: Dict[str, Any], timeout: int = 300
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
# Submit crawl job
|
# Submit crawl job using async endpoint
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.base_url}/crawl", json=request_data, headers=self.headers
|
f"{self.base_url}/crawl/job", json=request_data
|
||||||
)
|
)
|
||||||
if response.status_code == 403:
|
response.raise_for_status()
|
||||||
raise Exception("API token is invalid or missing")
|
job_response = response.json()
|
||||||
task_id = response.json()["task_id"]
|
task_id = job_response["task_id"]
|
||||||
print(f"Task ID: {task_id}")
|
print(f"Submitted job with task_id: {task_id}")
|
||||||
|
|
||||||
# Poll for result
|
# Poll for result
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -38,8 +32,9 @@ class Crawl4AiTester:
|
|||||||
)
|
)
|
||||||
|
|
||||||
result = requests.get(
|
result = requests.get(
|
||||||
f"{self.base_url}/task/{task_id}", headers=self.headers
|
f"{self.base_url}/crawl/job/{task_id}"
|
||||||
)
|
)
|
||||||
|
result.raise_for_status()
|
||||||
status = result.json()
|
status = result.json()
|
||||||
|
|
||||||
if status["status"] == "failed":
|
if status["status"] == "failed":
|
||||||
@@ -52,10 +47,10 @@ class Crawl4AiTester:
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
# Use synchronous crawl endpoint
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.base_url}/crawl_sync",
|
f"{self.base_url}/crawl",
|
||||||
json=request_data,
|
json=request_data,
|
||||||
headers=self.headers,
|
|
||||||
timeout=60,
|
timeout=60,
|
||||||
)
|
)
|
||||||
if response.status_code == 408:
|
if response.status_code == 408:
|
||||||
@@ -63,20 +58,9 @@ class Crawl4AiTester:
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def crawl_direct(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Directly crawl without using task queue"""
|
|
||||||
response = requests.post(
|
|
||||||
f"{self.base_url}/crawl_direct", json=request_data, headers=self.headers
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response.json()
|
|
||||||
|
|
||||||
|
|
||||||
def test_docker_deployment(version="basic"):
|
def test_docker_deployment(version="basic"):
|
||||||
tester = Crawl4AiTester(
|
tester = Crawl4AiTester(
|
||||||
base_url="http://localhost:11235",
|
base_url="http://localhost:11235",
|
||||||
# base_url="https://api.crawl4ai.com" # just for example
|
|
||||||
# api_token="test" # just for example
|
|
||||||
)
|
)
|
||||||
print(f"Testing Crawl4AI Docker {version} version")
|
print(f"Testing Crawl4AI Docker {version} version")
|
||||||
|
|
||||||
@@ -95,11 +79,8 @@ def test_docker_deployment(version="basic"):
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
# Test cases based on version
|
# Test cases based on version
|
||||||
test_basic_crawl_direct(tester)
|
|
||||||
test_basic_crawl(tester)
|
|
||||||
test_basic_crawl(tester)
|
test_basic_crawl(tester)
|
||||||
test_basic_crawl_sync(tester)
|
test_basic_crawl_sync(tester)
|
||||||
|
|
||||||
if version in ["full", "transformer"]:
|
if version in ["full", "transformer"]:
|
||||||
test_cosine_extraction(tester)
|
test_cosine_extraction(tester)
|
||||||
|
|
||||||
@@ -112,115 +93,129 @@ def test_docker_deployment(version="basic"):
|
|||||||
|
|
||||||
|
|
||||||
def test_basic_crawl(tester: Crawl4AiTester):
|
def test_basic_crawl(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Basic Crawl ===")
|
print("\n=== Testing Basic Crawl (Async) ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 10,
|
"browser_config": {},
|
||||||
"session_id": "test",
|
"crawler_config": {}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"Basic crawl result length: {len(result['result']['markdown'])}")
|
print(f"Basic crawl result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
assert len(result["result"]["markdown"]) > 0
|
assert len(result["result"]["results"]) > 0
|
||||||
|
assert len(result["result"]["results"][0]["markdown"]) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_basic_crawl_sync(tester: Crawl4AiTester):
|
def test_basic_crawl_sync(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Basic Crawl (Sync) ===")
|
print("\n=== Testing Basic Crawl (Sync) ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 10,
|
"browser_config": {},
|
||||||
"session_id": "test",
|
"crawler_config": {}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_sync(request)
|
result = tester.submit_sync(request)
|
||||||
print(f"Basic crawl result length: {len(result['result']['markdown'])}")
|
print(f"Basic crawl result count: {len(result['results'])}")
|
||||||
assert result["status"] == "completed"
|
assert result["success"]
|
||||||
assert result["result"]["success"]
|
assert len(result["results"]) > 0
|
||||||
assert len(result["result"]["markdown"]) > 0
|
assert len(result["results"][0]["markdown"]) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_basic_crawl_direct(tester: Crawl4AiTester):
|
|
||||||
print("\n=== Testing Basic Crawl (Direct) ===")
|
|
||||||
request = {
|
|
||||||
"urls": "https://www.nbcnews.com/business",
|
|
||||||
"priority": 10,
|
|
||||||
# "session_id": "test"
|
|
||||||
"cache_mode": "bypass", # or "enabled", "disabled", "read_only", "write_only"
|
|
||||||
}
|
|
||||||
|
|
||||||
result = tester.crawl_direct(request)
|
|
||||||
print(f"Basic crawl result length: {len(result['result']['markdown'])}")
|
|
||||||
assert result["result"]["success"]
|
|
||||||
assert len(result["result"]["markdown"]) > 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_js_execution(tester: Crawl4AiTester):
|
def test_js_execution(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing JS Execution ===")
|
print("\n=== Testing JS Execution ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {"headless": True},
|
||||||
"js_code": [
|
"crawler_config": {
|
||||||
"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();"
|
"js_code": [
|
||||||
],
|
"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); if(loadMoreButton) loadMoreButton.click();"
|
||||||
"wait_for": "article.tease-card:nth-child(10)",
|
],
|
||||||
"crawler_params": {"headless": True},
|
"wait_for": "wide-tease-item__wrapper df flex-column flex-row-m flex-nowrap-m enable-new-sports-feed-mobile-design(10)"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"JS execution result length: {len(result['result']['markdown'])}")
|
print(f"JS execution result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
|
|
||||||
|
|
||||||
def test_css_selector(tester: Crawl4AiTester):
|
def test_css_selector(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing CSS Selector ===")
|
print("\n=== Testing CSS Selector ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 7,
|
"browser_config": {"headless": True},
|
||||||
"css_selector": ".wide-tease-item__description",
|
"crawler_config": {
|
||||||
"crawler_params": {"headless": True},
|
"css_selector": ".wide-tease-item__description",
|
||||||
"extra": {"word_count_threshold": 10},
|
"word_count_threshold": 10
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"CSS selector result length: {len(result['result']['markdown'])}")
|
print(f"CSS selector result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
|
|
||||||
|
|
||||||
def test_structured_extraction(tester: Crawl4AiTester):
|
def test_structured_extraction(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Structured Extraction ===")
|
print("\n=== Testing Structured Extraction ===")
|
||||||
schema = {
|
schema = {
|
||||||
"name": "Coinbase Crypto Prices",
|
"name": "Cryptocurrency Prices",
|
||||||
"baseSelector": ".cds-tableRow-t45thuk",
|
"baseSelector": "table[data-testid=\"prices-table\"] tbody tr",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"name": "crypto",
|
"name": "asset_name",
|
||||||
"selector": "td:nth-child(1) h2",
|
"selector": "td:nth-child(2) p.cds-headline-h4steop",
|
||||||
"type": "text",
|
"type": "text"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symbol",
|
"name": "asset_symbol",
|
||||||
"selector": "td:nth-child(1) p",
|
"selector": "td:nth-child(2) p.cds-label2-l1sm09ec",
|
||||||
"type": "text",
|
"type": "text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "asset_image_url",
|
||||||
|
"selector": "td:nth-child(2) img[alt=\"Asset Symbol\"]",
|
||||||
|
"type": "attribute",
|
||||||
|
"attribute": "src"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "asset_url",
|
||||||
|
"selector": "td:nth-child(2) a[aria-label^=\"Asset page for\"]",
|
||||||
|
"type": "attribute",
|
||||||
|
"attribute": "href"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "price",
|
"name": "price",
|
||||||
"selector": "td:nth-child(2)",
|
"selector": "td:nth-child(3) div.cds-typographyResets-t6muwls.cds-body-bwup3gq",
|
||||||
"type": "text",
|
"type": "text"
|
||||||
},
|
},
|
||||||
],
|
{
|
||||||
|
"name": "change",
|
||||||
|
"selector": "td:nth-child(7) p.cds-body-bwup3gq",
|
||||||
|
"type": "text"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.coinbase.com/explore",
|
"urls": ["https://www.coinbase.com/explore"],
|
||||||
"priority": 9,
|
"browser_config": {},
|
||||||
"extraction_config": {"type": "json_css", "params": {"schema": schema}},
|
"crawler_config": {
|
||||||
|
"type": "CrawlerRunConfig",
|
||||||
|
"params": {
|
||||||
|
"extraction_strategy": {
|
||||||
|
"type": "JsonCssExtractionStrategy",
|
||||||
|
"params": {"schema": schema}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} items")
|
print(f"Extracted {len(extracted)} items")
|
||||||
print("Sample item:", json.dumps(extracted[0], indent=2))
|
if extracted:
|
||||||
|
print("Sample item:", json.dumps(extracted[0], indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
assert len(extracted) > 0
|
assert len(extracted) > 0
|
||||||
|
|
||||||
@@ -230,43 +225,54 @@ def test_llm_extraction(tester: Crawl4AiTester):
|
|||||||
schema = {
|
schema = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model_name": {
|
"asset_name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Name of the OpenAI model.",
|
"description": "Name of the asset.",
|
||||||
},
|
},
|
||||||
"input_fee": {
|
"price": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Fee for input token for the OpenAI model.",
|
"description": "Price of the asset.",
|
||||||
},
|
},
|
||||||
"output_fee": {
|
"change": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Fee for output token for the OpenAI model.",
|
"description": "Change in price of the asset.",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"required": ["model_name", "input_fee", "output_fee"],
|
"required": ["asset_name", "price", "change"],
|
||||||
}
|
}
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://openai.com/api/pricing",
|
"urls": ["https://www.coinbase.com/en-in/explore"],
|
||||||
"priority": 8,
|
"browser_config": {},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "llm",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"provider": "openai/gpt-4o-mini",
|
"extraction_strategy": {
|
||||||
"api_token": os.getenv("OPENAI_API_KEY"),
|
"type": "LLMExtractionStrategy",
|
||||||
"schema": schema,
|
"params": {
|
||||||
"extraction_type": "schema",
|
"llm_config": {
|
||||||
"instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""",
|
"type": "LLMConfig",
|
||||||
},
|
"params": {
|
||||||
},
|
"provider": "gemini/gemini-2.0-flash-exp",
|
||||||
"crawler_params": {"word_count_threshold": 1},
|
"api_token": os.getenv("GEMINI_API_KEY")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema": schema,
|
||||||
|
"extraction_type": "schema",
|
||||||
|
"instruction": "From the crawled content, extract asset names along with their prices and change in price.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"word_count_threshold": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} model pricing entries")
|
print(f"Extracted {len(extracted)} asset pricing entries")
|
||||||
print("Sample entry:", json.dumps(extracted[0], indent=2))
|
if extracted:
|
||||||
|
print("Sample entry:", json.dumps(extracted[0], indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"LLM extraction test failed (might be due to missing API key): {str(e)}")
|
print(f"LLM extraction test failed (might be due to missing API key): {str(e)}")
|
||||||
@@ -274,6 +280,16 @@ def test_llm_extraction(tester: Crawl4AiTester):
|
|||||||
|
|
||||||
def test_llm_with_ollama(tester: Crawl4AiTester):
|
def test_llm_with_ollama(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing LLM with Ollama ===")
|
print("\n=== Testing LLM with Ollama ===")
|
||||||
|
|
||||||
|
# Check if Ollama is accessible first
|
||||||
|
try:
|
||||||
|
ollama_response = requests.get("http://localhost:11434/api/tags", timeout=5)
|
||||||
|
ollama_response.raise_for_status()
|
||||||
|
print("Ollama is accessible")
|
||||||
|
except:
|
||||||
|
print("Ollama is not accessible, skipping test")
|
||||||
|
return
|
||||||
|
|
||||||
schema = {
|
schema = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -294,24 +310,33 @@ def test_llm_with_ollama(tester: Crawl4AiTester):
|
|||||||
}
|
}
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {"verbose": True},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "llm",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"provider": "ollama/llama2",
|
"extraction_strategy": {
|
||||||
"schema": schema,
|
"type": "LLMExtractionStrategy",
|
||||||
"extraction_type": "schema",
|
"params": {
|
||||||
"instruction": "Extract the main article information including title, summary, and main topics.",
|
"llm_config": {
|
||||||
},
|
"type": "LLMConfig",
|
||||||
},
|
"params": {
|
||||||
"extra": {"word_count_threshold": 1},
|
"provider": "ollama/llama3.2:latest",
|
||||||
"crawler_params": {"verbose": True},
|
}
|
||||||
|
},
|
||||||
|
"schema": schema,
|
||||||
|
"extraction_type": "schema",
|
||||||
|
"instruction": "Extract the main article information including title, summary, and main topics.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"word_count_threshold": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print("Extracted content:", json.dumps(extracted, indent=2))
|
print("Extracted content:", json.dumps(extracted, indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -321,24 +346,30 @@ def test_llm_with_ollama(tester: Crawl4AiTester):
|
|||||||
def test_cosine_extraction(tester: Crawl4AiTester):
|
def test_cosine_extraction(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Cosine Extraction ===")
|
print("\n=== Testing Cosine Extraction ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "cosine",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"semantic_filter": "business finance economy",
|
"extraction_strategy": {
|
||||||
"word_count_threshold": 10,
|
"type": "CosineStrategy",
|
||||||
"max_dist": 0.2,
|
"params": {
|
||||||
"top_k": 3,
|
"semantic_filter": "business finance economy",
|
||||||
},
|
"word_count_threshold": 10,
|
||||||
},
|
"max_dist": 0.2,
|
||||||
|
"top_k": 3,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} text clusters")
|
print(f"Extracted {len(extracted)} text clusters")
|
||||||
print("First cluster tags:", extracted[0]["tags"])
|
if extracted:
|
||||||
|
print("First cluster tags:", extracted[0]["tags"])
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Cosine extraction test failed: {str(e)}")
|
print(f"Cosine extraction test failed: {str(e)}")
|
||||||
@@ -347,20 +378,25 @@ def test_cosine_extraction(tester: Crawl4AiTester):
|
|||||||
def test_screenshot(tester: Crawl4AiTester):
|
def test_screenshot(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Screenshot ===")
|
print("\n=== Testing Screenshot ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": "https://www.nbcnews.com/business",
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 5,
|
"browser_config": {"headless": True},
|
||||||
"screenshot": True,
|
"crawler_config": {
|
||||||
"crawler_params": {"headless": True},
|
"type": "CrawlerRunConfig",
|
||||||
|
"params": {
|
||||||
|
"screenshot": True
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print("Screenshot captured:", bool(result["result"]["screenshot"]))
|
screenshot_data = result["result"]["results"][0]["screenshot"]
|
||||||
|
print("Screenshot captured:", bool(screenshot_data))
|
||||||
|
|
||||||
if result["result"]["screenshot"]:
|
if screenshot_data:
|
||||||
# Save screenshot
|
# Save screenshot
|
||||||
screenshot_data = base64.b64decode(result["result"]["screenshot"])
|
screenshot_bytes = base64.b64decode(screenshot_data)
|
||||||
with open("test_screenshot.jpg", "wb") as f:
|
with open("test_screenshot.jpg", "wb") as f:
|
||||||
f.write(screenshot_data)
|
f.write(screenshot_bytes)
|
||||||
print("Screenshot saved as test_screenshot.jpg")
|
print("Screenshot saved as test_screenshot.jpg")
|
||||||
|
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
@@ -368,5 +404,4 @@ def test_screenshot(tester: Crawl4AiTester):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
version = sys.argv[1] if len(sys.argv) > 1 else "basic"
|
version = sys.argv[1] if len(sys.argv) > 1 else "basic"
|
||||||
# version = "full"
|
|
||||||
test_docker_deployment(version)
|
test_docker_deployment(version)
|
||||||
|
|||||||
@@ -6,28 +6,22 @@ import base64
|
|||||||
import os
|
import os
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
|
||||||
class Crawl4AiTester:
|
class Crawl4AiTester:
|
||||||
def __init__(self, base_url: str = "http://localhost:11235", api_token: str = None):
|
def __init__(self, base_url: str = "http://localhost:11235"):
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
self.api_token = api_token or os.getenv(
|
|
||||||
"CRAWL4AI_API_TOKEN"
|
|
||||||
) # Check environment variable as fallback
|
|
||||||
self.headers = (
|
|
||||||
{"Authorization": f"Bearer {self.api_token}"} if self.api_token else {}
|
|
||||||
)
|
|
||||||
|
|
||||||
def submit_and_wait(
|
def submit_and_wait(
|
||||||
self, request_data: Dict[str, Any], timeout: int = 300
|
self, request_data: Dict[str, Any], timeout: int = 300
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
# Submit crawl job
|
# Submit crawl job using async endpoint
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.base_url}/crawl", json=request_data, headers=self.headers
|
f"{self.base_url}/crawl/job", json=request_data
|
||||||
)
|
)
|
||||||
if response.status_code == 403:
|
response.raise_for_status()
|
||||||
raise Exception("API token is invalid or missing")
|
job_response = response.json()
|
||||||
task_id = response.json()["task_id"]
|
task_id = job_response["task_id"]
|
||||||
print(f"Task ID: {task_id}")
|
print(f"Submitted job with task_id: {task_id}")
|
||||||
|
|
||||||
# Poll for result
|
# Poll for result
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -38,8 +32,9 @@ class Crawl4AiTester:
|
|||||||
)
|
)
|
||||||
|
|
||||||
result = requests.get(
|
result = requests.get(
|
||||||
f"{self.base_url}/task/{task_id}", headers=self.headers
|
f"{self.base_url}/crawl/job/{task_id}"
|
||||||
)
|
)
|
||||||
|
result.raise_for_status()
|
||||||
status = result.json()
|
status = result.json()
|
||||||
|
|
||||||
if status["status"] == "failed":
|
if status["status"] == "failed":
|
||||||
@@ -52,10 +47,10 @@ class Crawl4AiTester:
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
# Use synchronous crawl endpoint
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.base_url}/crawl_sync",
|
f"{self.base_url}/crawl",
|
||||||
json=request_data,
|
json=request_data,
|
||||||
headers=self.headers,
|
|
||||||
timeout=60,
|
timeout=60,
|
||||||
)
|
)
|
||||||
if response.status_code == 408:
|
if response.status_code == 408:
|
||||||
@@ -66,9 +61,8 @@ class Crawl4AiTester:
|
|||||||
|
|
||||||
def test_docker_deployment(version="basic"):
|
def test_docker_deployment(version="basic"):
|
||||||
tester = Crawl4AiTester(
|
tester = Crawl4AiTester(
|
||||||
# base_url="http://localhost:11235" ,
|
base_url="http://localhost:11235",
|
||||||
base_url="https://crawl4ai-sby74.ondigitalocean.app",
|
#base_url="https://crawl4ai-sby74.ondigitalocean.app",
|
||||||
api_token="test",
|
|
||||||
)
|
)
|
||||||
print(f"Testing Crawl4AI Docker {version} version")
|
print(f"Testing Crawl4AI Docker {version} version")
|
||||||
|
|
||||||
@@ -88,63 +82,60 @@ def test_docker_deployment(version="basic"):
|
|||||||
|
|
||||||
# Test cases based on version
|
# Test cases based on version
|
||||||
test_basic_crawl(tester)
|
test_basic_crawl(tester)
|
||||||
test_basic_crawl(tester)
|
|
||||||
test_basic_crawl_sync(tester)
|
test_basic_crawl_sync(tester)
|
||||||
|
|
||||||
# if version in ["full", "transformer"]:
|
if version in ["full", "transformer"]:
|
||||||
# test_cosine_extraction(tester)
|
test_cosine_extraction(tester)
|
||||||
|
|
||||||
# test_js_execution(tester)
|
test_js_execution(tester)
|
||||||
# test_css_selector(tester)
|
test_css_selector(tester)
|
||||||
# test_structured_extraction(tester)
|
test_structured_extraction(tester)
|
||||||
# test_llm_extraction(tester)
|
test_llm_extraction(tester)
|
||||||
# test_llm_with_ollama(tester)
|
test_llm_with_ollama(tester)
|
||||||
# test_screenshot(tester)
|
test_screenshot(tester)
|
||||||
|
|
||||||
|
|
||||||
def test_basic_crawl(tester: Crawl4AiTester):
|
def test_basic_crawl(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Basic Crawl ===")
|
print("\n=== Testing Basic Crawl (Async) ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 10,
|
|
||||||
"session_id": "test",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"Basic crawl result length: {len(result['result']['markdown'])}")
|
print(f"Basic crawl result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
assert len(result["result"]["markdown"]) > 0
|
assert len(result["result"]["results"]) > 0
|
||||||
|
assert len(result["result"]["results"][0]["markdown"]) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_basic_crawl_sync(tester: Crawl4AiTester):
|
def test_basic_crawl_sync(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Basic Crawl (Sync) ===")
|
print("\n=== Testing Basic Crawl (Sync) ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 10,
|
|
||||||
"session_id": "test",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_sync(request)
|
result = tester.submit_sync(request)
|
||||||
print(f"Basic crawl result length: {len(result['result']['markdown'])}")
|
print(f"Basic crawl result count: {len(result['results'])}")
|
||||||
assert result["status"] == "completed"
|
assert result["success"]
|
||||||
assert result["result"]["success"]
|
assert len(result["results"]) > 0
|
||||||
assert len(result["result"]["markdown"]) > 0
|
assert len(result["results"][0]["markdown"]) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_js_execution(tester: Crawl4AiTester):
|
def test_js_execution(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing JS Execution ===")
|
print("\n=== Testing JS Execution ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {"headless": True},
|
||||||
"js_code": [
|
"crawler_config": {
|
||||||
"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();"
|
"js_code": [
|
||||||
],
|
"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); if(loadMoreButton) loadMoreButton.click();"
|
||||||
"wait_for": "article.tease-card:nth-child(10)",
|
],
|
||||||
"crawler_params": {"headless": True},
|
"wait_for": "wide-tease-item__wrapper df flex-column flex-row-m flex-nowrap-m enable-new-sports-feed-mobile-design(10)"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"JS execution result length: {len(result['result']['markdown'])}")
|
print(f"JS execution result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
|
|
||||||
|
|
||||||
@@ -152,51 +143,78 @@ def test_css_selector(tester: Crawl4AiTester):
|
|||||||
print("\n=== Testing CSS Selector ===")
|
print("\n=== Testing CSS Selector ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 7,
|
"browser_config": {"headless": True},
|
||||||
"css_selector": ".wide-tease-item__description",
|
"crawler_config": {
|
||||||
"crawler_params": {"headless": True},
|
"css_selector": ".wide-tease-item__description",
|
||||||
"extra": {"word_count_threshold": 10},
|
"word_count_threshold": 10
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print(f"CSS selector result length: {len(result['result']['markdown'])}")
|
print(f"CSS selector result count: {len(result['result']['results'])}")
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
|
|
||||||
|
|
||||||
def test_structured_extraction(tester: Crawl4AiTester):
|
def test_structured_extraction(tester: Crawl4AiTester):
|
||||||
print("\n=== Testing Structured Extraction ===")
|
print("\n=== Testing Structured Extraction ===")
|
||||||
schema = {
|
schema = {
|
||||||
"name": "Coinbase Crypto Prices",
|
"name": "Cryptocurrency Prices",
|
||||||
"baseSelector": ".cds-tableRow-t45thuk",
|
"baseSelector": "table[data-testid=\"prices-table\"] tbody tr",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"name": "crypto",
|
"name": "asset_name",
|
||||||
"selector": "td:nth-child(1) h2",
|
"selector": "td:nth-child(2) p.cds-headline-h4steop",
|
||||||
"type": "text",
|
"type": "text"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "symbol",
|
"name": "asset_symbol",
|
||||||
"selector": "td:nth-child(1) p",
|
"selector": "td:nth-child(2) p.cds-label2-l1sm09ec",
|
||||||
"type": "text",
|
"type": "text"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "price",
|
"name": "asset_image_url",
|
||||||
"selector": "td:nth-child(2)",
|
"selector": "td:nth-child(2) img[alt=\"Asset Symbol\"]",
|
||||||
"type": "text",
|
"type": "attribute",
|
||||||
},
|
"attribute": "src"
|
||||||
],
|
},
|
||||||
|
{
|
||||||
|
"name": "asset_url",
|
||||||
|
"selector": "td:nth-child(2) a[aria-label^=\"Asset page for\"]",
|
||||||
|
"type": "attribute",
|
||||||
|
"attribute": "href"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "price",
|
||||||
|
"selector": "td:nth-child(3) div.cds-typographyResets-t6muwls.cds-body-bwup3gq",
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "change",
|
||||||
|
"selector": "td:nth-child(7) p.cds-body-bwup3gq",
|
||||||
|
"type": "text"
|
||||||
}
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.coinbase.com/explore"],
|
"urls": ["https://www.coinbase.com/explore"],
|
||||||
"priority": 9,
|
"crawler_config": {
|
||||||
"extraction_config": {"type": "json_css", "params": {"schema": schema}},
|
"type": "CrawlerRunConfig",
|
||||||
|
"params": {
|
||||||
|
"extraction_strategy": {
|
||||||
|
"type": "JsonCssExtractionStrategy",
|
||||||
|
"params": {"schema": schema}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} items")
|
print(f"Extracted {len(extracted)} items")
|
||||||
print("Sample item:", json.dumps(extracted[0], indent=2))
|
if extracted:
|
||||||
|
print("Sample item:", json.dumps(extracted[0], indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
assert len(extracted) > 0
|
assert len(extracted) > 0
|
||||||
|
|
||||||
@@ -206,43 +224,54 @@ def test_llm_extraction(tester: Crawl4AiTester):
|
|||||||
schema = {
|
schema = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model_name": {
|
"asset_name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Name of the OpenAI model.",
|
"description": "Name of the asset.",
|
||||||
},
|
},
|
||||||
"input_fee": {
|
"price": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Fee for input token for the OpenAI model.",
|
"description": "Price of the asset.",
|
||||||
},
|
},
|
||||||
"output_fee": {
|
"change": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Fee for output token for the OpenAI model.",
|
"description": "Change in price of the asset.",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"required": ["model_name", "input_fee", "output_fee"],
|
"required": ["asset_name", "price", "change"],
|
||||||
}
|
}
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://openai.com/api/pricing"],
|
"urls": ["https://www.coinbase.com/en-in/explore"],
|
||||||
"priority": 8,
|
"browser_config": {},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "llm",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"provider": "openai/gpt-4o-mini",
|
"extraction_strategy": {
|
||||||
"api_token": os.getenv("OPENAI_API_KEY"),
|
"type": "LLMExtractionStrategy",
|
||||||
"schema": schema,
|
"params": {
|
||||||
"extraction_type": "schema",
|
"llm_config": {
|
||||||
"instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""",
|
"type": "LLMConfig",
|
||||||
},
|
"params": {
|
||||||
},
|
"provider": "gemini/gemini-2.5-flash",
|
||||||
"crawler_params": {"word_count_threshold": 1},
|
"api_token": os.getenv("GEMINI_API_KEY")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"schema": schema,
|
||||||
|
"extraction_type": "schema",
|
||||||
|
"instruction": "From the crawled content tioned asset names along with their prices and change in price.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"word_count_threshold": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} model pricing entries")
|
print(f"Extracted {len(extracted)} model pricing entries")
|
||||||
print("Sample entry:", json.dumps(extracted[0], indent=2))
|
if extracted:
|
||||||
|
print("Sample entry:", json.dumps(extracted[0], indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"LLM extraction test failed (might be due to missing API key): {str(e)}")
|
print(f"LLM extraction test failed (might be due to missing API key): {str(e)}")
|
||||||
@@ -271,23 +300,32 @@ def test_llm_with_ollama(tester: Crawl4AiTester):
|
|||||||
|
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {"verbose": True},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "llm",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"provider": "ollama/llama2",
|
"extraction_strategy": {
|
||||||
"schema": schema,
|
"type": "LLMExtractionStrategy",
|
||||||
"extraction_type": "schema",
|
"params": {
|
||||||
"instruction": "Extract the main article information including title, summary, and main topics.",
|
"llm_config": {
|
||||||
},
|
"type": "LLMConfig",
|
||||||
},
|
"params": {
|
||||||
"extra": {"word_count_threshold": 1},
|
"provider": "ollama/llama3.2:latest",
|
||||||
"crawler_params": {"verbose": True},
|
}
|
||||||
|
},
|
||||||
|
"schema": schema,
|
||||||
|
"extraction_type": "schema",
|
||||||
|
"instruction": "Extract the main article information including title, summary, and main topics.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"word_count_threshold": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print("Extracted content:", json.dumps(extracted, indent=2))
|
print("Extracted content:", json.dumps(extracted, indent=2))
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -298,23 +336,29 @@ def test_cosine_extraction(tester: Crawl4AiTester):
|
|||||||
print("\n=== Testing Cosine Extraction ===")
|
print("\n=== Testing Cosine Extraction ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 8,
|
"browser_config": {},
|
||||||
"extraction_config": {
|
"crawler_config": {
|
||||||
"type": "cosine",
|
"type": "CrawlerRunConfig",
|
||||||
"params": {
|
"params": {
|
||||||
"semantic_filter": "business finance economy",
|
"extraction_strategy": {
|
||||||
"word_count_threshold": 10,
|
"type": "CosineStrategy",
|
||||||
"max_dist": 0.2,
|
"params": {
|
||||||
"top_k": 3,
|
"semantic_filter": "business finance economy",
|
||||||
},
|
"word_count_threshold": 10,
|
||||||
},
|
"max_dist": 0.2,
|
||||||
|
"top_k": 3,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
extracted = json.loads(result["result"]["extracted_content"])
|
extracted = json.loads(result["result"]["results"][0]["extracted_content"])
|
||||||
print(f"Extracted {len(extracted)} text clusters")
|
print(f"Extracted {len(extracted)} text clusters")
|
||||||
print("First cluster tags:", extracted[0]["tags"])
|
if extracted:
|
||||||
|
print("First cluster tags:", extracted[0]["tags"])
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Cosine extraction test failed: {str(e)}")
|
print(f"Cosine extraction test failed: {str(e)}")
|
||||||
@@ -324,19 +368,24 @@ def test_screenshot(tester: Crawl4AiTester):
|
|||||||
print("\n=== Testing Screenshot ===")
|
print("\n=== Testing Screenshot ===")
|
||||||
request = {
|
request = {
|
||||||
"urls": ["https://www.nbcnews.com/business"],
|
"urls": ["https://www.nbcnews.com/business"],
|
||||||
"priority": 5,
|
"browser_config": {"headless": True},
|
||||||
"screenshot": True,
|
"crawler_config": {
|
||||||
"crawler_params": {"headless": True},
|
"type": "CrawlerRunConfig",
|
||||||
|
"params": {
|
||||||
|
"screenshot": True
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result = tester.submit_and_wait(request)
|
result = tester.submit_and_wait(request)
|
||||||
print("Screenshot captured:", bool(result["result"]["screenshot"]))
|
screenshot_data = result["result"]["results"][0]["screenshot"]
|
||||||
|
print("Screenshot captured:", bool(screenshot_data))
|
||||||
|
|
||||||
if result["result"]["screenshot"]:
|
if screenshot_data:
|
||||||
# Save screenshot
|
# Save screenshot
|
||||||
screenshot_data = base64.b64decode(result["result"]["screenshot"])
|
screenshot_bytes = base64.b64decode(screenshot_data)
|
||||||
with open("test_screenshot.jpg", "wb") as f:
|
with open("test_screenshot.jpg", "wb") as f:
|
||||||
f.write(screenshot_data)
|
f.write(screenshot_bytes)
|
||||||
print("Screenshot saved as test_screenshot.jpg")
|
print("Screenshot saved as test_screenshot.jpg")
|
||||||
|
|
||||||
assert result["result"]["success"]
|
assert result["result"]["success"]
|
||||||
|
|||||||
Reference in New Issue
Block a user