feat: cleanup unused code and enhance documentation for v0.7.1

- Remove unused StealthConfig from browser_manager.py
- Update LinkPreviewConfig import path in __init__.py and examples
- Fix infinity handling in content_scraping_strategy.py (use 0 instead of float('inf'))
- Remove sanitize_json_data functions from API endpoints
- Add comprehensive C4A Script documentation to release notes
- Update v0.7.0 release notes with improved code examples
- Create v0.7.1 release notes focusing on cleanup and documentation improvements
- Update demo files with corrected import paths and examples
- Fix virtual scroll and adaptive crawling examples across documentation

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
ntohidi
2025-07-17 11:35:16 +02:00
parent ccbe3c105c
commit cf8badfe27
13 changed files with 241 additions and 343 deletions

View File

@@ -54,27 +54,6 @@ def _get_memory_mb():
logger.warning(f"Could not get memory info: {e}")
return None
# --- Helper to sanitize JSON data ---
def sanitize_json_data(data):
"""
Recursively sanitize data to handle infinity and NaN values that are not JSON compliant.
"""
import math
if isinstance(data, dict):
return {k: sanitize_json_data(v) for k, v in data.items()}
elif isinstance(data, list):
return [sanitize_json_data(item) for item in data]
elif isinstance(data, float):
if math.isinf(data):
return "Infinity" if data > 0 else "-Infinity"
elif math.isnan(data):
return "NaN"
else:
return data
else:
return data
async def handle_llm_qa(
url: str,
@@ -392,10 +371,8 @@ async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator)
server_memory_mb = _get_memory_mb()
result_dict = result.model_dump()
result_dict['server_memory_mb'] = server_memory_mb
# Sanitize data to handle infinity values
sanitized_dict = sanitize_json_data(result_dict)
logger.info(f"Streaming result for {sanitized_dict.get('url', 'unknown')}")
data = json.dumps(sanitized_dict, default=datetime_handler) + "\n"
logger.info(f"Streaming result for {result_dict.get('url', 'unknown')}")
data = json.dumps(result_dict, default=datetime_handler) + "\n"
yield data.encode('utf-8')
except Exception as e:
logger.error(f"Serialization error: {e}")
@@ -469,7 +446,7 @@ async def handle_crawl_request(
return {
"success": True,
"results": [sanitize_json_data(result.model_dump()) for result in results],
"results": [result.model_dump() for result in results],
"server_processing_time_s": end_time - start_time,
"server_memory_delta_mb": mem_delta_mb,
"server_peak_memory_mb": peak_mem_mb