fix: remove_overlay_elements functionality by calling injected JS function. ref: #1396
- Fix critical bug where overlay removal JS function was injected but never called - Change remove_overlay_elements() to properly execute the injected async function - Wrap JS execution in async to handle the async overlay removal logic - Add test_remove_overlay_elements() test case to verify functionality works - Ensure overlay elements (cookie banners, popups, modals) are actually removed The remove_overlay_elements feature now works as intended: - Before: Function definition injected but never executed (silent failure) - After: Function injected and called, successfully removing overlay elements
This commit is contained in:
@@ -1383,9 +1383,10 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
try:
|
||||
await self.adapter.evaluate(page,
|
||||
f"""
|
||||
(() => {{
|
||||
(async () => {{
|
||||
try {{
|
||||
{remove_overlays_js}
|
||||
const removeOverlays = {remove_overlays_js};
|
||||
await removeOverlays();
|
||||
return {{ success: true }};
|
||||
}} catch (error) {{
|
||||
return {{
|
||||
|
||||
@@ -364,5 +364,19 @@ async def test_network_error_handling():
|
||||
async with AsyncPlaywrightCrawlerStrategy() as strategy:
|
||||
await strategy.crawl("https://invalid.example.com", config)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_remove_overlay_elements(crawler_strategy):
|
||||
config = CrawlerRunConfig(
|
||||
remove_overlay_elements=True,
|
||||
delay_before_return_html=5,
|
||||
)
|
||||
|
||||
response = await crawler_strategy.crawl(
|
||||
"https://www2.hm.com/en_us/index.html",
|
||||
config
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Accept all cookies" not in response.html
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user