Enhance installation and migration processes

- Added a post-installation setup script for initialization.
  - Updated README with installation notes for Playwright setup.
  - Enhanced migration logging for better error visibility.
  - Added 'pydantic' to requirements.
  - Bumped version to 0.3.746.
This commit is contained in:
UncleCode
2024-11-29 18:48:44 +08:00
parent 12e73d4898
commit d202f3539b
8 changed files with 90 additions and 102 deletions

View File

@@ -4,7 +4,6 @@ from .async_webcrawler import AsyncWebCrawler, CacheMode
from .models import CrawlResult
from .__version__ import __version__
# __version__ = "0.3.73"
__all__ = [
"AsyncWebCrawler",

View File

@@ -1,2 +1,2 @@
# crawl4ai/_version.py
__version__ = "0.3.745"
__version__ = "0.3.746"

44
crawl4ai/install.py Normal file
View File

@@ -0,0 +1,44 @@
import subprocess
import sys
import asyncio
from .async_logger import AsyncLogger, LogLevel
# Initialize logger
logger = AsyncLogger(log_level=LogLevel.DEBUG, verbose=True)
def post_install():
"""Run all post-installation tasks"""
logger.info("Running post-installation setup...", tag="INIT")
install_playwright()
run_migration()
logger.success("Post-installation setup completed!", tag="COMPLETE")
def install_playwright():
logger.info("Installing Playwright browsers...", tag="INIT")
try:
subprocess.check_call([sys.executable, "-m", "playwright", "install"])
logger.success("Playwright installation completed successfully.", tag="COMPLETE")
except subprocess.CalledProcessError as e:
logger.error(f"Error during Playwright installation: {e}", tag="ERROR")
logger.warning(
"Please run 'python -m playwright install' manually after the installation."
)
except Exception as e:
logger.error(f"Unexpected error during Playwright installation: {e}", tag="ERROR")
logger.warning(
"Please run 'python -m playwright install' manually after the installation."
)
def run_migration():
"""Initialize database during installation"""
try:
logger.info("Starting database initialization...", tag="INIT")
from crawl4ai.async_database import async_db_manager
asyncio.run(async_db_manager.initialize())
logger.success("Database initialization completed successfully.", tag="COMPLETE")
except ImportError:
logger.warning("Database module not found. Will initialize on first use.")
except Exception as e:
logger.warning(f"Database initialization failed: {e}")
logger.warning("Database will be initialized on first use")

View File

@@ -9,9 +9,13 @@ import aiofiles
import shutil
import time
from datetime import datetime
from .async_logger import AsyncLogger, LogLevel
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize logger
logger = AsyncLogger(log_level=LogLevel.DEBUG, verbose=True)
# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)
class DatabaseMigration:
def __init__(self, db_path: str):
@@ -55,7 +59,8 @@ class DatabaseMigration:
async def migrate_database(self):
"""Migrate existing database to file-based storage"""
logger.info("Starting database migration...")
# logger.info("Starting database migration...")
logger.info("Starting database migration...", tag="INIT")
try:
async with aiosqlite.connect(self.db_path) as db:
@@ -91,19 +96,25 @@ class DatabaseMigration:
migrated_count += 1
if migrated_count % 100 == 0:
logger.info(f"Migrated {migrated_count} records...")
logger.info(f"Migrated {migrated_count} records...", tag="INIT")
await db.commit()
logger.info(f"Migration completed. {migrated_count} records processed.")
logger.success(f"Migration completed. {migrated_count} records processed.", tag="COMPLETE")
except Exception as e:
logger.error(f"Migration failed: {e}")
raise
# logger.error(f"Migration failed: {e}")
logger.error(
message="Migration failed: {error}",
tag="ERROR",
params={"error": str(e)}
)
raise e
async def backup_database(db_path: str) -> str:
"""Create backup of existing database"""
if not os.path.exists(db_path):
logger.info("No existing database found. Skipping backup.")
logger.info("No existing database found. Skipping backup.", tag="INIT")
return None
# Create backup with timestamp
@@ -116,11 +127,16 @@ async def backup_database(db_path: str) -> str:
# Create backup
shutil.copy2(db_path, backup_path)
logger.info(f"Database backup created at: {backup_path}")
logger.info(f"Database backup created at: {backup_path}", tag="COMPLETE")
return backup_path
except Exception as e:
logger.error(f"Backup failed: {e}")
raise
# logger.error(f"Backup failed: {e}")
logger.error(
message="Migration failed: {error}",
tag="ERROR",
params={"error": str(e)}
)
raise e
async def run_migration(db_path: Optional[str] = None):
"""Run database migration"""
@@ -128,7 +144,7 @@ async def run_migration(db_path: Optional[str] = None):
db_path = os.path.join(Path.home(), ".crawl4ai", "crawl4ai.db")
if not os.path.exists(db_path):
logger.info("No existing database found. Skipping migration.")
logger.info("No existing database found. Skipping migration.", tag="INIT")
return
# Create backup first