From 509844208617673ee4cd066a4386a6c76fdadf91 Mon Sep 17 00:00:00 2001 From: UncleCode Date: Sat, 16 Nov 2024 15:30:24 +0800 Subject: [PATCH] refactor: migrate versioning to __version__.py and remove deprecated _version.py --- crawl4ai/__init__.py | 2 +- crawl4ai/{_version.py => __version__.py} | 0 crawl4ai/async_webcrawler.py | 2 +- crawl4ai/content_filter_strategy.py | 5 ----- middlewares.py | 0 requirements-dev.txt | 5 ----- requirements.txt | 2 ++ setup.py | 2 +- 8 files changed, 5 insertions(+), 13 deletions(-) rename crawl4ai/{_version.py => __version__.py} (100%) delete mode 100644 middlewares.py delete mode 100644 requirements-dev.txt diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py index 1bcc491c..e55aaf73 100644 --- a/crawl4ai/__init__.py +++ b/crawl4ai/__init__.py @@ -2,7 +2,7 @@ from .async_webcrawler import AsyncWebCrawler from .models import CrawlResult -from ._version import __version__ +from .__version__ import __version__ # __version__ = "0.3.73" __all__ = [ diff --git a/crawl4ai/_version.py b/crawl4ai/__version__.py similarity index 100% rename from crawl4ai/_version.py rename to crawl4ai/__version__.py diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index febc01d3..03e7a393 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -16,7 +16,7 @@ from .utils import ( InvalidCSSSelectorError, format_html ) -from ._version import __version__ as crawl4ai_version +from .__version__ import __version__ as crawl4ai_version class AsyncWebCrawler: def __init__( diff --git a/crawl4ai/content_filter_strategy.py b/crawl4ai/content_filter_strategy.py index 850ebf11..88375da9 100644 --- a/crawl4ai/content_filter_strategy.py +++ b/crawl4ai/content_filter_strategy.py @@ -1,12 +1,7 @@ -import os import re -import time from bs4 import BeautifulSoup, Tag from typing import List, Tuple, Dict from rank_bm25 import BM25Okapi -import nltk -from time import perf_counter -from html5lib import parse, treebuilders from time import perf_counter from collections import deque from bs4 import BeautifulSoup, NavigableString, Tag diff --git a/middlewares.py b/middlewares.py deleted file mode 100644 index e69de29b..00000000 diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 7bc121a4..00000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,5 +0,0 @@ --r requirements.txt -pytest -pytest-asyncio -selenium -setuptools diff --git a/requirements.txt b/requirements.txt index 94f741ca..74e8b3d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ requests~=2.26 beautifulsoup4~=4.12 tf-playwright-stealth~=1.0 xxhash~=3.4 +rank-bm25~=0.2 +aiofiles~=24.0 \ No newline at end of file diff --git a/setup.py b/setup.py index d3145ac1..d8ad2cd3 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file with open(os.path.join(__location__, "requirements.txt")) as f: requirements = f.read().splitlines() -with open("crawl4ai/_version.py") as f: +with open("crawl4ai/__version__.py") as f: for line in f: if line.startswith("__version__"): version = line.split("=")[1].strip().strip('"')