CRAWL4_AI_BASE_DIRECTORY should be Path object instead of string (#298)

Thank you so much for your point. Yes, that's correct. I accept your pull request, and I add your name to a contribution list. Thank you again.
This commit is contained in:
Paulo Kuong
2024-11-28 06:46:36 -05:00
committed by GitHub
parent 48d43c14b1
commit cf35cbe59e

View File

@@ -9,10 +9,16 @@ import asyncio
# Create the .crawl4ai folder in the user's home directory if it doesn't exist # Create the .crawl4ai folder in the user's home directory if it doesn't exist
# If the folder already exists, remove the cache folder # If the folder already exists, remove the cache folder
crawl4ai_folder = os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()) / ".crawl4ai" crawl4ai_folder = Path(os.getenv("CRAWL4_AI_BASE_DIRECTORY")) or Path.home()
crawl4ai_folder = crawl4ai_folder / ".crawl4ai"
cache_folder = crawl4ai_folder / "cache" cache_folder = crawl4ai_folder / "cache"
content_folders = ['html_content', 'cleaned_html', 'markdown_content', content_folders = [
'extracted_content', 'screenshots'] "html_content",
"cleaned_html",
"markdown_content",
"extracted_content",
"screenshots",
]
# Clean up old cache if exists # Clean up old cache if exists
if cache_folder.exists(): if cache_folder.exists():
@@ -28,7 +34,7 @@ for folder in content_folders:
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
with open(os.path.join(__location__, "requirements.txt")) as f: with open(os.path.join(__location__, "requirements.txt")) as f:
requirements = f.read().splitlines() requirements = f.read().splitlines()
with open("crawl4ai/__version__.py") as f: with open("crawl4ai/__version__.py") as f:
for line in f: for line in f:
if line.startswith("__version__"): if line.startswith("__version__"):
@@ -37,11 +43,12 @@ with open("crawl4ai/__version__.py") as f:
# Define requirements # Define requirements
default_requirements = requirements default_requirements = requirements
torch_requirements = ["torch", "nltk", "scikit-learn"] torch_requirements = ["torch", "nltk", "scikit-learn"]
transformer_requirements = ["transformers", "tokenizers"] transformer_requirements = ["transformers", "tokenizers"]
cosine_similarity_requirements = ["torch", "transformers", "nltk" ] cosine_similarity_requirements = ["torch", "transformers", "nltk"]
sync_requirements = ["selenium"] sync_requirements = ["selenium"]
def install_playwright(): def install_playwright():
print("Installing Playwright browsers...") print("Installing Playwright browsers...")
try: try:
@@ -49,16 +56,22 @@ def install_playwright():
print("Playwright installation completed successfully.") print("Playwright installation completed successfully.")
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f"Error during Playwright installation: {e}") print(f"Error during Playwright installation: {e}")
print("Please run 'python -m playwright install' manually after the installation.") print(
"Please run 'python -m playwright install' manually after the installation."
)
except Exception as e: except Exception as e:
print(f"Unexpected error during Playwright installation: {e}") print(f"Unexpected error during Playwright installation: {e}")
print("Please run 'python -m playwright install' manually after the installation.") print(
"Please run 'python -m playwright install' manually after the installation."
)
def run_migration(): def run_migration():
"""Initialize database during installation""" """Initialize database during installation"""
try: try:
print("Starting database initialization...") print("Starting database initialization...")
from crawl4ai.async_database import async_db_manager from crawl4ai.async_database import async_db_manager
asyncio.run(async_db_manager.initialize()) asyncio.run(async_db_manager.initialize())
print("Database initialization completed successfully.") print("Database initialization completed successfully.")
except ImportError: except ImportError:
@@ -67,12 +80,14 @@ def run_migration():
print(f"Warning: Database initialization failed: {e}") print(f"Warning: Database initialization failed: {e}")
print("Database will be initialized on first use") print("Database will be initialized on first use")
class PostInstallCommand(install): class PostInstallCommand(install):
def run(self): def run(self):
install.run(self) install.run(self)
install_playwright() install_playwright()
# run_migration() # run_migration()
setup( setup(
name="Crawl4AI", name="Crawl4AI",
version=version, version=version,
@@ -84,18 +99,23 @@ setup(
author_email="unclecode@kidocode.com", author_email="unclecode@kidocode.com",
license="MIT", license="MIT",
packages=find_packages(), packages=find_packages(),
install_requires=default_requirements + ["playwright", "aiofiles"], # Added aiofiles install_requires=default_requirements
+ ["playwright", "aiofiles"], # Added aiofiles
extras_require={ extras_require={
"torch": torch_requirements, "torch": torch_requirements,
"transformer": transformer_requirements, "transformer": transformer_requirements,
"cosine": cosine_similarity_requirements, "cosine": cosine_similarity_requirements,
"sync": sync_requirements, "sync": sync_requirements,
"all": default_requirements + torch_requirements + transformer_requirements + cosine_similarity_requirements + sync_requirements, "all": default_requirements
+ torch_requirements
+ transformer_requirements
+ cosine_similarity_requirements
+ sync_requirements,
}, },
entry_points={ entry_points={
'console_scripts': [ "console_scripts": [
'crawl4ai-download-models=crawl4ai.model_loader:main', "crawl4ai-download-models=crawl4ai.model_loader:main",
'crawl4ai-migrate=crawl4ai.migrations:main', # Added migration command "crawl4ai-migrate=crawl4ai.migrations:main", # Added migration command
], ],
}, },
classifiers=[ classifiers=[
@@ -110,6 +130,6 @@ setup(
], ],
python_requires=">=3.7", python_requires=">=3.7",
cmdclass={ cmdclass={
'install': PostInstallCommand, "install": PostInstallCommand,
}, },
) )