#!/usr/bin/env python3 """ Crawl4ai AWS Lambda Deployment Script This script automates the deployment of the Crawl4ai web crawler as an AWS Lambda function, providing an interactive step-by-step process with rich terminal UI. """ import os import sys import json import time import subprocess from typing import Optional, Dict, List, Any, Tuple import typer from rich.console import Console from rich.panel import Panel from rich.prompt import Prompt, Confirm from rich.progress import Progress, SpinnerColumn, TextColumn from rich.syntax import Syntax from rich.table import Table from rich import print as rprint # Initialize typer app and console app = typer.Typer(help="Deploy Crawl4ai to AWS Lambda") console = Console() # Default configuration DEFAULT_CONFIG = { "aws_region": "us-east-1", "ecr_repository_name": "crawl4ai-lambda", "lambda_function_name": "crawl4ai-function", "api_gateway_name": "crawl4ai-api", "memory_size": 4096, "timeout": 300, "enable_provisioned_concurrency": False, "provisioned_concurrency_count": 2, "ephemeral_storage_size": 10240, } def run_command(command: List[str], capture_output: bool = False) -> Tuple[int, str, str]: """Run a shell command and return exit code, stdout, and stderr.""" console.print(f"[dim]$ {' '.join(command)}[/dim]") result = subprocess.run( command, capture_output=True, text=True ) stdout = result.stdout.strip() stderr = result.stderr.strip() if not capture_output: if stdout: console.print(stdout) if stderr and result.returncode != 0: console.print(f"[bold red]Error:[/bold red] {stderr}") return result.returncode, stdout, stderr def show_step_header(step_number: int, step_title: str) -> None: """Display a step header with step number and title.""" console.print(f"\n[bold blue]Step {step_number}:[/bold blue] [bold]{step_title}[/bold]") console.print("=" * 80) def wait_for_confirmation(message: str = "Press Enter to continue...") -> None: """Wait for user confirmation to proceed.""" console.print() Prompt.ask(f"[yellow]{message}[/yellow]") def check_aws_credentials() -> bool: """Check if AWS credentials are configured.""" code, stdout, stderr = run_command(["aws", "sts", "get-caller-identity"], capture_output=True) if code != 0: console.print(Panel( "[bold red]AWS credentials not found or invalid![/bold red]\n\n" "Please configure your AWS credentials by running:\n" " aws configure\n\n" "You'll need to provide your AWS Access Key ID, Secret Access Key, and default region.", title="AWS Authentication Error", expand=False )) return False try: identity = json.loads(stdout) console.print(f"[green]Authenticated as:[/green] [bold]{identity.get('Arn')}[/bold]") return True except json.JSONDecodeError: console.print("[bold red]Error parsing AWS identity information[/bold red]") return False def check_prerequisites() -> bool: """Check if all required tools are installed.""" prerequisites = { "aws": "AWS CLI", "docker": "Docker" } all_installed = True with console.status("[bold blue]Checking prerequisites...[/bold blue]"): for cmd, name in prerequisites.items(): try: subprocess.run( ["which", cmd], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) console.print(f"[green]✓[/green] {name} is installed") except subprocess.CalledProcessError: console.print(f"[red]✗[/red] {name} is [bold red]not installed[/bold red]") all_installed = False if not all_installed: console.print(Panel( "Please install the missing prerequisites and try again.", title="Missing Prerequisites", expand=False )) return all_installed def verify_iam_role(config: Dict[str, Any]) -> Optional[str]: """Verify the Lambda execution role exists and return its ARN.""" role_name = "lambda-execution-role" with console.status(f"[bold blue]Verifying IAM role '{role_name}'...[/bold blue]"): code, stdout, stderr = run_command( ["aws", "iam", "get-role", "--role-name", role_name, "--query", "Role.Arn", "--output", "text"], capture_output=True ) if code != 0: console.print(f"[bold yellow]IAM role '{role_name}' not found. Creating it...[/bold yellow]") # Create basic execution role policy_document = json.dumps({ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": {"Service": "lambda.amazonaws.com"}, "Action": "sts:AssumeRole" }] }) code, create_stdout, create_stderr = run_command([ "aws", "iam", "create-role", "--role-name", role_name, "--assume-role-policy-document", policy_document, "--query", "Role.Arn", "--output", "text" ], capture_output=True) if code != 0: console.print(f"[bold red]Failed to create IAM role:[/bold red] {create_stderr}") return None # Attach basic Lambda execution policy run_command([ "aws", "iam", "attach-role-policy", "--role-name", role_name, "--policy-arn", "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" ]) console.print(f"[green]Created IAM role:[/green] {create_stdout.strip()}") return create_stdout.strip() else: console.print(f"[green]Found IAM role:[/green] {stdout.strip()}") return stdout.strip() def build_docker_image(config: Dict[str, Any]) -> bool: """Build the Docker image for the Lambda function.""" show_step_header(1, "Building Docker Image") console.print("This step will build the Docker image that contains Crawl4ai and its dependencies.") if not os.path.exists("Dockerfile"): console.print("[bold red]Error:[/bold red] Dockerfile not found in the current directory.") return False if not os.path.exists("lambda_function.py"): console.print("[bold red]Error:[/bold red] lambda_function.py not found in the current directory.") return False wait_for_confirmation() with Progress( SpinnerColumn(), TextColumn("[bold blue]Building Docker image...[/bold blue]"), console=console ) as progress: progress.add_task("build", total=None) code, stdout, stderr = run_command([ "docker", "build", "-t", config["ecr_repository_name"], "." ]) if code != 0: console.print("[bold red]Docker build failed![/bold red]") return False console.print("[bold green]Docker image built successfully![/bold green]") return True def setup_ecr_repository(config: Dict[str, Any]) -> Optional[str]: """Create ECR repository if it doesn't exist and return repository URI.""" show_step_header(2, "Setting Up Amazon ECR Repository") console.print("This step will create an Amazon ECR repository to store the Docker image.") wait_for_confirmation() # Check if repository exists code, stdout, stderr = run_command([ "aws", "ecr", "describe-repositories", "--repository-names", config["ecr_repository_name"], "--region", config["aws_region"], "--query", "repositories[0].repositoryUri", "--output", "text" ], capture_output=True) if code != 0: console.print(f"[yellow]Creating new ECR repository: {config['ecr_repository_name']}[/yellow]") code, create_stdout, create_stderr = run_command([ "aws", "ecr", "create-repository", "--repository-name", config["ecr_repository_name"], "--region", config["aws_region"], "--query", "repository.repositoryUri", "--output", "text" ], capture_output=True) if code != 0: console.print(f"[bold red]Failed to create ECR repository:[/bold red] {create_stderr}") return None repository_uri = create_stdout.strip() else: repository_uri = stdout.strip() console.print(f"[green]Found existing ECR repository:[/green] {repository_uri}") return repository_uri def push_image_to_ecr(config: Dict[str, Any], repository_uri: str) -> bool: """Push the Docker image to ECR.""" show_step_header(3, "Pushing Docker Image to ECR") console.print("This step will push the Docker image to Amazon ECR.") wait_for_confirmation() # Get account ID code, account_id, stderr = run_command([ "aws", "sts", "get-caller-identity", "--query", "Account", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to get AWS account ID[/bold red]") return False account_id = account_id.strip() # Get ECR login password console.print("[blue]Logging in to Amazon ECR...[/blue]") code, password, stderr = run_command([ "aws", "ecr", "get-login-password", "--region", config["aws_region"] ], capture_output=True) if code != 0: console.print("[bold red]Failed to get ECR login password[/bold red]") return False # Log in to ECR login_cmd = ["docker", "login", "--username", "AWS", "--password-stdin", f"{account_id}.dkr.ecr.{config['aws_region']}.amazonaws.com"] login_process = subprocess.Popen( login_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) stdout, stderr = login_process.communicate(input=password) if login_process.returncode != 0: console.print(f"[bold red]Failed to log in to ECR:[/bold red] {stderr}") return False console.print("[green]Successfully logged in to ECR[/green]") # Tag and push image console.print(f"[blue]Tagging image as {repository_uri}:latest[/blue]") code, stdout, stderr = run_command([ "docker", "tag", f"{config['ecr_repository_name']}:latest", f"{repository_uri}:latest" ]) if code != 0: console.print("[bold red]Failed to tag Docker image[/bold red]") return False with Progress( SpinnerColumn(), TextColumn("[bold blue]Pushing image to ECR...[/bold blue]"), console=console ) as progress: progress.add_task("push", total=None) code, stdout, stderr = run_command([ "docker", "push", f"{repository_uri}:latest" ]) if code != 0: console.print("[bold red]Failed to push image to ECR[/bold red]") return False console.print("[bold green]Successfully pushed image to ECR![/bold green]") return True def deploy_lambda_function(config: Dict[str, Any], repository_uri: str, role_arn: str) -> bool: """Create or update Lambda function.""" show_step_header(4, "Deploying Lambda Function") console.print("This step will create or update the AWS Lambda function.") wait_for_confirmation() # Check if Lambda function exists code, stdout, stderr = run_command([ "aws", "lambda", "list-functions", "--region", config["aws_region"], "--query", f"Functions[?FunctionName=='{config['lambda_function_name']}'].FunctionName", "--output", "text" ], capture_output=True) function_exists = stdout.strip() != "" if function_exists: console.print(f"[yellow]Updating existing Lambda function: {config['lambda_function_name']}[/yellow]") # Update function code with console.status("[bold blue]Updating Lambda function code...[/bold blue]"): code, stdout, stderr = run_command([ "aws", "lambda", "update-function-code", "--region", config["aws_region"], "--function-name", config["lambda_function_name"], "--image-uri", f"{repository_uri}:latest" ]) if code != 0: console.print("[bold red]Failed to update Lambda function code[/bold red]") return False # Update function configuration with console.status("[bold blue]Updating Lambda function configuration...[/bold blue]"): code, stdout, stderr = run_command([ "aws", "lambda", "update-function-configuration", "--region", config["aws_region"], "--function-name", config["lambda_function_name"], "--timeout", str(config["timeout"]), "--memory-size", str(config["memory_size"]), "--ephemeral-storage", f"Size={config['ephemeral_storage_size']}", "--environment", f"Variables={{" f"CRAWL4_AI_BASE_DIRECTORY=/tmp/.crawl4ai," f"HOME=/tmp," f"PLAYWRIGHT_BROWSERS_PATH=/function/pw-browsers" f"}}" ]) if code != 0: console.print("[bold red]Failed to update Lambda function configuration[/bold red]") return False else: console.print(f"[blue]Creating new Lambda function: {config['lambda_function_name']}[/blue]") with console.status("[bold blue]Creating Lambda function...[/bold blue]"): code, stdout, stderr = run_command([ "aws", "lambda", "create-function", "--region", config["aws_region"], "--function-name", config["lambda_function_name"], "--package-type", "Image", "--code", f"ImageUri={repository_uri}:latest", "--role", role_arn, "--timeout", str(config["timeout"]), "--memory-size", str(config["memory_size"]), "--ephemeral-storage", f"Size={config['ephemeral_storage_size']}", "--environment", f"Variables={{" f"CRAWL4_AI_BASE_DIRECTORY=/tmp/.crawl4ai," f"HOME=/tmp," f"PLAYWRIGHT_BROWSERS_PATH=/function/pw-browsers" f"}}" ]) if code != 0: console.print("[bold red]Failed to create Lambda function[/bold red]") return False console.print("[bold green]Lambda function deployed successfully![/bold green]") return True def setup_api_gateway(config: Dict[str, Any]) -> Optional[str]: """Create or update API Gateway.""" show_step_header(5, "Setting Up API Gateway") console.print("This step will create an API Gateway to expose your Lambda function as a REST API.") wait_for_confirmation() # Check if API Gateway exists code, api_id, stderr = run_command([ "aws", "apigateway", "get-rest-apis", "--region", config["aws_region"], "--query", f"items[?name=='{config['api_gateway_name']}'].id", "--output", "text" ], capture_output=True) api_id = api_id.strip() if not api_id: console.print(f"[blue]Creating new API Gateway: {config['api_gateway_name']}[/blue]") # Create API Gateway code, api_id, stderr = run_command([ "aws", "apigateway", "create-rest-api", "--name", config["api_gateway_name"], "--region", config["aws_region"], "--query", "id", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to create API Gateway[/bold red]") return None api_id = api_id.strip() # Get root resource ID code, parent_id, stderr = run_command([ "aws", "apigateway", "get-resources", "--rest-api-id", api_id, "--region", config["aws_region"], "--query", "items[?path=='/'].id", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to get API Gateway root resource[/bold red]") return None parent_id = parent_id.strip() # Create resource console.print("[blue]Creating API Gateway resource...[/blue]") code, resource_id, stderr = run_command([ "aws", "apigateway", "create-resource", "--rest-api-id", api_id, "--region", config["aws_region"], "--parent-id", parent_id, "--path-part", "crawl", "--query", "id", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to create API Gateway resource[/bold red]") return None resource_id = resource_id.strip() # Create POST method console.print("[blue]Creating POST method...[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "put-method", "--rest-api-id", api_id, "--resource-id", resource_id, "--http-method", "POST", "--authorization-type", "NONE", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to create API Gateway method[/bold red]") return None # Get Lambda function ARN code, lambda_arn, stderr = run_command([ "aws", "lambda", "get-function", "--function-name", config["lambda_function_name"], "--region", config["aws_region"], "--query", "Configuration.FunctionArn", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to get Lambda function ARN[/bold red]") return None lambda_arn = lambda_arn.strip() # Set Lambda integration console.print("[blue]Setting up Lambda integration...[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "put-integration", "--rest-api-id", api_id, "--resource-id", resource_id, "--http-method", "POST", "--type", "AWS_PROXY", "--integration-http-method", "POST", "--uri", f"arn:aws:apigateway:{config['aws_region']}:lambda:path/2015-03-31/functions/{lambda_arn}/invocations", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to set API Gateway integration[/bold red]") return None # Deploy API console.print("[blue]Deploying API...[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "create-deployment", "--rest-api-id", api_id, "--stage-name", "prod", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to deploy API Gateway[/bold red]") return None # Set Lambda permission account_id = lambda_arn.split(":")[4] console.print("[blue]Setting Lambda permissions...[/blue]") code, stdout, stderr = run_command([ "aws", "lambda", "add-permission", "--function-name", config["lambda_function_name"], "--statement-id", "apigateway", "--action", "lambda:InvokeFunction", "--principal", "apigateway.amazonaws.com", "--source-arn", f"arn:aws:execute-api:{config['aws_region']}:{account_id}:{api_id}/*/POST/crawl", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to set Lambda permission[/bold red]") return None else: console.print(f"[green]Found existing API Gateway:[/green] {api_id}") console.print("[bold green]API Gateway setup complete![/bold green]") return api_id def configure_provisioned_concurrency(config: Dict[str, Any]) -> bool: """Configure provisioned concurrency if enabled.""" if not config["enable_provisioned_concurrency"]: console.print("[yellow]Skipping provisioned concurrency setup (not enabled)[/yellow]") return True show_step_header(6, "Setting Up Provisioned Concurrency") console.print("This step will configure provisioned concurrency to avoid cold starts.") wait_for_confirmation() # Publish a version console.print("[blue]Publishing Lambda version...[/blue]") code, version, stderr = run_command([ "aws", "lambda", "publish-version", "--function-name", config["lambda_function_name"], "--region", config["aws_region"], "--query", "Version", "--output", "text" ], capture_output=True) if code != 0: console.print("[bold red]Failed to publish Lambda version[/bold red]") return False version = version.strip() console.print(f"[green]Published version:[/green] {version}") # Check if alias exists alias_exists = False code, stdout, stderr = run_command([ "aws", "lambda", "get-alias", "--function-name", config["lambda_function_name"], "--name", "prod", "--region", config["aws_region"] ], capture_output=True) alias_exists = code == 0 # Create or update alias if alias_exists: console.print("[blue]Updating 'prod' alias...[/blue]") code, stdout, stderr = run_command([ "aws", "lambda", "update-alias", "--function-name", config["lambda_function_name"], "--name", "prod", "--function-version", version, "--region", config["aws_region"] ]) else: console.print("[blue]Creating 'prod' alias...[/blue]") code, stdout, stderr = run_command([ "aws", "lambda", "create-alias", "--function-name", config["lambda_function_name"], "--name", "prod", "--function-version", version, "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to create/update alias[/bold red]") return False # Configure provisioned concurrency console.print(f"[blue]Configuring provisioned concurrency ({config['provisioned_concurrency_count']} instances)...[/blue]") code, stdout, stderr = run_command([ "aws", "lambda", "put-provisioned-concurrency-config", "--function-name", config["lambda_function_name"], "--qualifier", "prod", "--provisioned-concurrent-executions", str(config["provisioned_concurrency_count"]), "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to configure provisioned concurrency[/bold red]") return False # Update API Gateway to use alias api_id = run_command([ "aws", "apigateway", "get-rest-apis", "--region", config["aws_region"], "--query", f"items[?name=='{config['api_gateway_name']}'].id", "--output", "text" ], capture_output=True)[1].strip() if not api_id: console.print("[bold red]Failed to find API Gateway ID[/bold red]") return False resource_id = run_command([ "aws", "apigateway", "get-resources", "--rest-api-id", api_id, "--region", config["aws_region"], "--query", "items[?path=='/crawl'].id", "--output", "text" ], capture_output=True)[1].strip() if not resource_id: console.print("[bold red]Failed to find API Gateway resource ID[/bold red]") return False account_id = run_command([ "aws", "sts", "get-caller-identity", "--query", "Account", "--output", "text" ], capture_output=True)[1].strip() lambda_alias_arn = f"arn:aws:lambda:{config['aws_region']}:{account_id}:function:{config['lambda_function_name']}:prod" console.print("[blue]Updating API Gateway to use Lambda alias...[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "put-integration", "--rest-api-id", api_id, "--resource-id", resource_id, "--http-method", "POST", "--type", "AWS_PROXY", "--integration-http-method", "POST", "--uri", f"arn:aws:apigateway:{config['aws_region']}:lambda:path/2015-03-31/functions/{lambda_alias_arn}/invocations", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to update API Gateway integration[/bold red]") return False # Redeploy API Gateway console.print("[blue]Redeploying API Gateway...[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "create-deployment", "--rest-api-id", api_id, "--stage-name", "prod", "--region", config["aws_region"] ]) if code != 0: console.print("[bold red]Failed to redeploy API Gateway[/bold red]") return False console.print("[bold green]Provisioned concurrency setup complete![/bold green]") return True def show_deployment_summary(config: Dict[str, Any], api_id: str) -> None: """Show a summary of the deployment.""" endpoint_url = f"https://{api_id}.execute-api.{config['aws_region']}.amazonaws.com/prod/crawl" # Create a summary table table = Table(title="Deployment Summary") table.add_column("Component", style="cyan") table.add_column("Details", style="green") table.add_row("API Endpoint", endpoint_url) table.add_row("Lambda Function", config["lambda_function_name"]) table.add_row("Memory Size", f"{config['memory_size']} MB") table.add_row("Timeout", f"{config['timeout']} seconds") table.add_row("Ephemeral Storage", f"{config['ephemeral_storage_size']} MB") table.add_row("Provisioned Concurrency", "Enabled" if config["enable_provisioned_concurrency"] else "Disabled") if config["enable_provisioned_concurrency"]: table.add_row("Concurrency Units", str(config["provisioned_concurrency_count"])) console.print("\n") console.print(Panel( "🚀 [bold green]Crawl4ai has been successfully deployed to AWS Lambda![/bold green]", expand=False )) console.print(table) # Example usage console.print("\n[bold]Example Usage:[/bold]") url = "https://example.com" example_cmd = f"curl -X POST {endpoint_url} -H 'Content-Type: application/json' -d '{{\"url\": \"{url}\"}}'" console.print(Syntax(example_cmd, "bash", theme="monokai", line_numbers=False)) console.print("\n[bold]Python Example:[/bold]") python_example = f'''import requests import json url = "{endpoint_url}" payload = {{ "url": "https://example.com", "browser_config": {{ "headless": True, "verbose": False }}, "crawler_config": {{ "crawler_config": {{ "type": "CrawlerRunConfig", "params": {{ "markdown_generator": {{ "type": "DefaultMarkdownGenerator", "params": {{ "content_filter": {{ "type": "PruningContentFilter", "params": {{ "threshold": 0.48, "threshold_type": "fixed" }} }} }} }} }} }} }} }} response = requests.post(url, json=payload) result = response.json() print(json.dumps(result, indent=2)) ''' console.print(Syntax(python_example, "python", theme="monokai", line_numbers=False)) console.print("\n[bold green]Thank you for using Crawl4ai on AWS Lambda![/bold green]") def cleanup_resources(config: Dict[str, Any]) -> None: """Clean up all AWS resources created for Crawl4ai deployment.""" show_step_header("Cleanup", "Removing AWS Resources") console.print("This will remove all AWS resources created for Crawl4ai deployment, including:") console.print(" • Lambda Function") console.print(" • API Gateway") console.print(" • ECR Repository and Images") console.print(" • IAM Permissions") if not Confirm.ask( "[bold red]⚠️ Are you sure you want to delete all resources?[/bold red]", default=False ): console.print("[yellow]Cleanup cancelled.[/yellow]") return # Get API Gateway ID api_id = None with console.status("[blue]Finding API Gateway...[/blue]"): code, api_id, stderr = run_command([ "aws", "apigateway", "get-rest-apis", "--region", config["aws_region"], "--query", f"items[?name=='{config['api_gateway_name']}'].id", "--output", "text" ], capture_output=True) api_id = api_id.strip() # Delete API Gateway if api_id: console.print(f"[blue]Deleting API Gateway: {api_id}[/blue]") code, stdout, stderr = run_command([ "aws", "apigateway", "delete-rest-api", "--rest-api-id", api_id, "--region", config["aws_region"] ]) if code == 0: console.print("[green]✓[/green] API Gateway deleted successfully") else: console.print(f"[red]✗[/red] Failed to delete API Gateway: {stderr}") else: console.print("[yellow]No API Gateway found to delete[/yellow]") # Check if Lambda function exists lambda_exists = False with console.status("[blue]Checking Lambda function...[/blue]"): code, stdout, stderr = run_command([ "aws", "lambda", "get-function", "--function-name", config["lambda_function_name"], "--region", config["aws_region"] ], capture_output=True) lambda_exists = code == 0 # Delete provisioned concurrency if it exists if lambda_exists and config.get("enable_provisioned_concurrency", False): try: console.print("[blue]Removing provisioned concurrency...[/blue]") run_command([ "aws", "lambda", "delete-provisioned-concurrency-config", "--function-name", config["lambda_function_name"], "--qualifier", "prod", "--region", config["aws_region"] ], capture_output=True) console.print("[blue]Deleting function alias...[/blue]") run_command([ "aws", "lambda", "delete-alias", "--function-name", config["lambda_function_name"], "--name", "prod", "--region", config["aws_region"] ], capture_output=True) except Exception as e: console.print(f"[yellow]Warning: {str(e)}[/yellow]") # Delete Lambda function if lambda_exists: console.print(f"[blue]Deleting Lambda function: {config['lambda_function_name']}[/blue]") code, stdout, stderr = run_command([ "aws", "lambda", "delete-function", "--function-name", config["lambda_function_name"], "--region", config["aws_region"] ]) if code == 0: console.print("[green]✓[/green] Lambda function deleted successfully") else: console.print(f"[red]✗[/red] Failed to delete Lambda function: {stderr}") else: console.print("[yellow]No Lambda function found to delete[/yellow]") # Check if ECR repository exists ecr_exists = False with console.status("[blue]Checking ECR repository...[/blue]"): code, stdout, stderr = run_command([ "aws", "ecr", "describe-repositories", "--repository-names", config["ecr_repository_name"], "--region", config["aws_region"] ], capture_output=True) ecr_exists = code == 0 # Delete ECR repository if ecr_exists: console.print(f"[blue]Deleting ECR repository: {config['ecr_repository_name']}[/blue]") code, stdout, stderr = run_command([ "aws", "ecr", "delete-repository", "--repository-name", config["ecr_repository_name"], "--force", # Force delete even if it contains images "--region", config["aws_region"] ]) if code == 0: console.print("[green]✓[/green] ECR repository deleted successfully") else: console.print(f"[red]✗[/red] Failed to delete ECR repository: {stderr}") else: console.print("[yellow]No ECR repository found to delete[/yellow]") # Check and clean up IAM permissions console.print("[blue]Cleaning up IAM permissions...[/blue]") try: # Remove the Lambda permission for API Gateway run_command([ "aws", "lambda", "remove-permission", "--function-name", config["lambda_function_name"], "--statement-id", "apigateway", "--region", config["aws_region"] ], capture_output=True) except Exception: # It's okay if this fails, the function might already be deleted pass # Clean up local Docker images console.print("[blue]Cleaning up local Docker images...[/blue]") try: # Get account ID code, account_id, stderr = run_command([ "aws", "sts", "get-caller-identity", "--query", "Account", "--output", "text" ], capture_output=True) if code == 0: account_id = account_id.strip() repo_uri = f"{account_id}.dkr.ecr.{config['aws_region']}.amazonaws.com/{config['ecr_repository_name']}" run_command([ "docker", "rmi", f"{repo_uri}:latest", "--force" ], capture_output=True) run_command([ "docker", "rmi", f"{config['ecr_repository_name']}:latest", "--force" ], capture_output=True) except Exception as e: console.print(f"[yellow]Warning: Failed to clean up Docker images: {str(e)}[/yellow]") console.print("\n[bold green]Cleanup Complete![/bold green]") console.print("All AWS resources for the Crawl4ai deployment have been removed.") # Add this to the app commands @app.command() def cleanup(): """ Remove all AWS resources created for Crawl4ai deployment. This will delete the Lambda function, API Gateway, and ECR repository. """ console.print(Panel( "[bold red]Crawl4ai AWS Resources Cleanup[/bold red]\n\n" "This will remove all AWS resources created for Crawl4ai deployment.", title="Warning", expand=False )) # Check AWS credentials if not check_aws_credentials(): return # Get configuration config = DEFAULT_CONFIG.copy() console.print("\n[bold]Configuration[/bold]") console.print("Please confirm the resources to clean up:") config["aws_region"] = Prompt.ask( "AWS Region", default=config["aws_region"] ) config["lambda_function_name"] = Prompt.ask( "Lambda Function Name", default=config["lambda_function_name"] ) config["api_gateway_name"] = Prompt.ask( "API Gateway Name", default=config["api_gateway_name"] ) config["ecr_repository_name"] = Prompt.ask( "ECR Repository Name", default=config["ecr_repository_name"] ) # Run cleanup cleanup_resources(config) @app.command() def main() -> None: """ Deploy Crawl4ai to AWS Lambda. This script guides you through the process of deploying Crawl4ai as an AWS Lambda function with API Gateway integration. """ # Show welcome banner console.print(Panel( "[bold blue]Crawl4ai AWS Lambda Deployment Wizard[/bold blue]\n\n" "This tool will help you deploy Crawl4ai to AWS Lambda with API Gateway integration.", title="Welcome", expand=False )) # Check prerequisites if not check_prerequisites(): return # Check AWS credentials if not check_aws_credentials(): return # Get configuration config = DEFAULT_CONFIG.copy() console.print("\n[bold]Configuration[/bold]") console.print("Please configure your deployment:") config["aws_region"] = Prompt.ask( "AWS Region", default=config["aws_region"] ) config["lambda_function_name"] = Prompt.ask( "Lambda Function Name", default=config["lambda_function_name"] ) config["api_gateway_name"] = Prompt.ask( "API Gateway Name", default=config["api_gateway_name"] ) config["memory_size"] = int(Prompt.ask( "Lambda Memory Size (MB)", default=str(config["memory_size"]) )) config["timeout"] = int(Prompt.ask( "Lambda Timeout (seconds)", default=str(config["timeout"]) )) config["enable_provisioned_concurrency"] = Confirm.ask( "Enable Provisioned Concurrency (reduces cold starts)?", default=config["enable_provisioned_concurrency"] ) if config["enable_provisioned_concurrency"]: config["provisioned_concurrency_count"] = int(Prompt.ask( "Number of Provisioned Concurrency instances", default=str(config["provisioned_concurrency_count"]) )) # Verify IAM role role_arn = verify_iam_role(config) if not role_arn: console.print("[bold red]Failed to verify or create IAM role[/bold red]") return # Build Docker image if not build_docker_image(config): return # Setup ECR repository repository_uri = setup_ecr_repository(config) if not repository_uri: return # Push image to ECR if not push_image_to_ecr(config, repository_uri): return # Deploy Lambda function if not deploy_lambda_function(config, repository_uri, role_arn): return # Setup API Gateway api_id = setup_api_gateway(config) if not api_id: return # Configure provisioned concurrency if enabled if not configure_provisioned_concurrency(config): return # Show deployment summary show_deployment_summary(config, api_id) if __name__ == "__main__": app()