Migrating from the classic setup.py to a using PyProject approach.
This commit is contained in:
48
build_hooks.py
Normal file
48
build_hooks.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import sys
|
||||
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
|
||||
PLUGIN = "CustomBuildHook"
|
||||
|
||||
class CustomBuildHook(BuildHookInterface):
|
||||
def initialize(self, version, build_data):
|
||||
# Create the .crawl4ai folder structure
|
||||
base_dir = os.getenv("CRAWL4_AI_BASE_DIRECTORY")
|
||||
crawl4ai_folder = Path(base_dir) if base_dir else Path.home()
|
||||
crawl4ai_folder = crawl4ai_folder / ".crawl4ai"
|
||||
cache_folder = crawl4ai_folder / "cache"
|
||||
content_folders = [
|
||||
"html_content",
|
||||
"cleaned_html",
|
||||
"markdown_content",
|
||||
"extracted_content",
|
||||
"screenshots",
|
||||
]
|
||||
|
||||
# Clean up old cache if exists
|
||||
if cache_folder.exists():
|
||||
shutil.rmtree(cache_folder)
|
||||
|
||||
# Create new folder structure
|
||||
crawl4ai_folder.mkdir(exist_ok=True)
|
||||
cache_folder.mkdir(exist_ok=True)
|
||||
for folder in content_folders:
|
||||
(crawl4ai_folder / folder).mkdir(exist_ok=True)
|
||||
|
||||
# Install Playwright browsers
|
||||
try:
|
||||
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
|
||||
except Exception as e:
|
||||
print(f"Warning: Playwright installation failed: {e}")
|
||||
print("Please run 'python -m playwright install' manually after installation")
|
||||
|
||||
# Initialize database
|
||||
try:
|
||||
from crawl4ai.async_database import async_db_manager
|
||||
import asyncio
|
||||
asyncio.run(async_db_manager.initialize())
|
||||
except Exception as e:
|
||||
print(f"Warning: Database initialization failed: {e}")
|
||||
print("Database will be initialized on first use")
|
||||
Reference in New Issue
Block a user