chore: Add custom headers to LocalSeleniumCrawlerStrategy
This commit is contained in:
@@ -139,6 +139,13 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
|||||||
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
self.driver = webdriver.Chrome(service=self.service, options=self.options)
|
||||||
self.driver = self.execute_hook('on_driver_created', self.driver)
|
self.driver = self.execute_hook('on_driver_created', self.driver)
|
||||||
|
|
||||||
|
def set_custom_headers(self, headers: dict):
|
||||||
|
# Enable Network domain for sending headers
|
||||||
|
self.driver.execute_cdp_cmd('Network.enable', {})
|
||||||
|
# Set extra HTTP headers
|
||||||
|
self.driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': headers})
|
||||||
|
|
||||||
|
|
||||||
def crawl(self, url: str) -> str:
|
def crawl(self, url: str) -> str:
|
||||||
# Create md5 hash of the URL
|
# Create md5 hash of the URL
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|||||||
@@ -198,12 +198,11 @@ def using_crawler_hooks(crawler):
|
|||||||
print("[HOOK] on_driver_created")
|
print("[HOOK] on_driver_created")
|
||||||
# Example customization: maximize the window
|
# Example customization: maximize the window
|
||||||
driver.maximize_window()
|
driver.maximize_window()
|
||||||
return driver
|
|
||||||
|
|
||||||
def before_get_url(driver):
|
|
||||||
print("[HOOK] before_get_url")
|
|
||||||
# Example customization: logging in to a hypothetical website
|
# Example customization: logging in to a hypothetical website
|
||||||
driver.get('https://example.com/login')
|
driver.get('https://example.com/login')
|
||||||
|
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
WebDriverWait(driver, 10).until(
|
WebDriverWait(driver, 10).until(
|
||||||
EC.presence_of_element_located((By.NAME, 'username'))
|
EC.presence_of_element_located((By.NAME, 'username'))
|
||||||
)
|
)
|
||||||
@@ -215,8 +214,18 @@ def using_crawler_hooks(crawler):
|
|||||||
)
|
)
|
||||||
# Add a custom cookie
|
# Add a custom cookie
|
||||||
driver.add_cookie({'name': 'test_cookie', 'value': 'cookie_value'})
|
driver.add_cookie({'name': 'test_cookie', 'value': 'cookie_value'})
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
|
|
||||||
|
def before_get_url(driver):
|
||||||
|
print("[HOOK] before_get_url")
|
||||||
|
# Example customization: add a custom header
|
||||||
|
# Enable Network domain for sending headers
|
||||||
|
driver.execute_cdp_cmd('Network.enable', {})
|
||||||
|
# Add a custom header
|
||||||
|
driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}})
|
||||||
|
return driver
|
||||||
|
|
||||||
def after_get_url(driver):
|
def after_get_url(driver):
|
||||||
print("[HOOK] after_get_url")
|
print("[HOOK] after_get_url")
|
||||||
# Example customization: log the URL
|
# Example customization: log the URL
|
||||||
@@ -225,9 +234,9 @@ def using_crawler_hooks(crawler):
|
|||||||
|
|
||||||
def before_return_html(driver, html):
|
def before_return_html(driver, html):
|
||||||
print("[HOOK] before_return_html")
|
print("[HOOK] before_return_html")
|
||||||
# Example customization: modify the HTML (for demonstration purposes)
|
# Example customization: log the HTML
|
||||||
modified_html = html.replace('Example Domain', 'Test Domain')
|
print(len(html))
|
||||||
return driver, modified_html
|
return driver
|
||||||
|
|
||||||
cprint("\n🔗 [bold cyan]Using Crawler Hooks: Let's see how we can customize the crawler using hooks![/bold cyan]", True)
|
cprint("\n🔗 [bold cyan]Using Crawler Hooks: Let's see how we can customize the crawler using hooks![/bold cyan]", True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user