feat: Bump version to v0.2.73 and update documentation

This commit updates the version number to v0.2.73 and makes corresponding changes in the README.md and Dockerfile.

Docker file install the default mode, this resolve many of installation issues.

Additionally, the installation instructions are updated to include support for different modes. Setup.py doesn't have anymore dependancy on Spacy.

The change log is also updated to reflect these changes.

Supporting websites need with-head browser.
This commit is contained in:
unclecode
2024-07-03 15:19:22 +08:00
parent 88d8cd8650
commit 9926eb9f95
8 changed files with 34 additions and 25 deletions

View File

@@ -11,6 +11,8 @@ from .crawler_strategy import *
from typing import List
from concurrent.futures import ThreadPoolExecutor
from .config import *
import warnings
warnings.filterwarnings("ignore", message='Field "model_name" has conflict with protected namespace "model_".')
class WebCrawler:
@@ -164,7 +166,7 @@ class WebCrawler:
if user_agent:
self.crawler_strategy.update_user_agent(user_agent)
t1 = time.time()
html = self.crawler_strategy.crawl(url)
html = self.crawler_strategy.crawl(url, **kwargs)
t2 = time.time()
if verbose:
print(f"[LOG] 🚀 Crawling done for {url}, success: {bool(html)}, time taken: {t2 - t1} seconds")