diff --git a/Dockerfile b/Dockerfile
index b7e5e07f..9796bcb6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -151,24 +151,6 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
     redis-cli ping > /dev/null && \
     curl -f http://localhost:8000/health || exit 1'
 
-# COPY deploy/docker/docker-entrypoint.sh /usr/local/bin/
-# RUN chmod +x /usr/local/bin/docker-entrypoint.sh
-
 EXPOSE 6379
-
-# ENTRYPOINT ["docker-entrypoint.sh"]
-
-# CMD service redis-server start && gunicorn \
-#     --bind 0.0.0.0:8000 \
-#     --workers 4 \
-#     --threads 2 \
-#     --timeout 120 \
-#     --graceful-timeout 30 \
-#     --log-level info \
-#     --worker-class uvicorn.workers.UvicornWorker \
-#     server:app
-
-# ENTRYPOINT ["docker-entrypoint.sh"]
-
 CMD ["supervisord", "-c", "supervisord.conf"]
     
diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py
index 1a4cdcef..47c6778a 100644
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -10,7 +10,7 @@ import asyncio
 
 # from contextlib import nullcontext, asynccontextmanager
 from contextlib import asynccontextmanager
-from .models import CrawlResult, MarkdownGenerationResult,DispatchResult
+from .models import CrawlResult, MarkdownGenerationResult, DispatchResult, ScrapingResult
 from .async_database import async_db_manager
 from .chunking_strategy import *  # noqa: F403
 from .chunking_strategy import RegexChunking, ChunkingStrategy, IdentityChunking
@@ -537,7 +537,7 @@ class AsyncWebCrawler:
             ################################
             # Scraping Strategy Execution  #
             ################################
-            result = scraping_strategy.scrap(url, html, **params)
+            result : ScrapingResult = scraping_strategy.scrap(url, html, **params)
 
             if result is None:
                 raise ValueError(
diff --git a/crawl4ai/docker_client copy.py b/crawl4ai/docker_client copy.py
deleted file mode 100644
index 7c0fce1c..00000000
--- a/crawl4ai/docker_client copy.py	
+++ /dev/null
@@ -1,210 +0,0 @@
-from typing import List, Optional, Union, AsyncGenerator, Dict, Any
-import httpx
-import json
-from urllib.parse import urljoin
-
-from .async_configs import BrowserConfig, CrawlerRunConfig
-from .models import CrawlResult
-from .async_logger import AsyncLogger, LogLevel
-
-
-class Crawl4aiClientError(Exception):
-    """Base exception for Crawl4ai Docker client errors."""
-    pass
-
-
-class ConnectionError(Crawl4aiClientError):
-    """Raised when connection to the Docker server fails."""
-    pass
-
-
-class RequestError(Crawl4aiClientError):
-    """Raised when the server returns an error response."""
-    pass
-
-
-class Crawl4aiDockerClient:
-    """
-    Client for interacting with Crawl4AI Docker server.
-    
-    Args:
-        base_url (str): Base URL of the Crawl4AI Docker server
-        timeout (float): Default timeout for requests in seconds
-        verify_ssl (bool): Whether to verify SSL certificates
-        verbose (bool): Whether to show logging output
-        log_file (str, optional): Path to log file if file logging is desired
-    """
-    
-    def __init__(
-        self,
-        base_url: str = "http://localhost:8000",
-        timeout: float = 30.0,
-        verify_ssl: bool = True,
-        verbose: bool = True,
-        log_file: Optional[str] = None
-    ) -> None:
-        self.base_url = base_url.rstrip('/')
-        self.timeout = timeout
-        self._http_client = httpx.AsyncClient(
-            timeout=timeout,
-            verify=verify_ssl,
-            headers={"Content-Type": "application/json"}
-        )
-        self.logger = AsyncLogger(
-            log_file=log_file,
-            log_level=LogLevel.DEBUG,
-            verbose=verbose
-        )
-
-    async def _check_server_connection(self) -> bool:
-        """Check if server is reachable."""
-        try:
-            self.logger.info("Checking server connection...", tag="INIT")
-            response = await self._http_client.get(f"{self.base_url}/health")
-            response.raise_for_status()
-            self.logger.success(f"Connected to server at {self.base_url}", tag="READY")
-            return True
-        except Exception as e:
-            self.logger.error(f"Failed to connect to server: {str(e)}", tag="ERROR")
-            return False
-
-    def _prepare_request_data(
-        self,
-        urls: List[str],
-        browser_config: Optional[BrowserConfig] = None,
-        crawler_config: Optional[CrawlerRunConfig] = None
-    ) -> Dict[str, Any]:
-        """Prepare request data from configs using dump methods."""
-        self.logger.debug("Preparing request data", tag="INIT")
-        data = {
-            "urls": urls,
-            "browser_config": browser_config.dump() if browser_config else {},
-            "crawler_config": crawler_config.dump() if crawler_config else {}
-        }
-        self.logger.debug(f"Request data prepared for {len(urls)} URLs", tag="READY")
-        return data
-
-    async def _make_request(
-        self,
-        method: str,
-        endpoint: str,
-        **kwargs
-    ) -> Union[Dict, AsyncGenerator]:
-        """Make HTTP request to the server with error handling."""
-        url = urljoin(self.base_url, endpoint)
-        
-        try:
-            self.logger.debug(f"Making {method} request to {endpoint}", tag="FETCH")
-            response = await self._http_client.request(method, url, **kwargs)
-            response.raise_for_status()
-            self.logger.success(f"Request to {endpoint} successful", tag="COMPLETE")
-            return response
-        except httpx.TimeoutException as e:
-            error_msg = f"Request timed out: {str(e)}"
-            self.logger.error(error_msg, tag="ERROR")
-            raise ConnectionError(error_msg)
-        except httpx.RequestError as e:
-            error_msg = f"Failed to connect to server: {str(e)}"
-            self.logger.error(error_msg, tag="ERROR")
-            raise ConnectionError(error_msg)
-        except httpx.HTTPStatusError as e:
-            error_detail = ""
-            try:
-                error_data = e.response.json()
-                error_detail = error_data.get('detail', str(e))
-            except (json.JSONDecodeError, AttributeError) as json_err:
-                error_detail = f"{str(e)} (Failed to parse error response: {str(json_err)})"
-            
-            error_msg = f"Server returned error {e.response.status_code}: {error_detail}"
-            self.logger.error(error_msg, tag="ERROR")
-            raise RequestError(error_msg)
-
-    async def crawl(
-        self,
-        urls: List[str],
-        browser_config: Optional[BrowserConfig] = None,
-        crawler_config: Optional[CrawlerRunConfig] = None
-    ) -> Union[CrawlResult, AsyncGenerator[CrawlResult, None]]:
-        """Execute a crawl operation through the Docker server."""
-        # Check server connection first
-        if not await self._check_server_connection():
-            raise ConnectionError("Cannot proceed with crawl - server is not reachable")
-
-        request_data = self._prepare_request_data(urls, browser_config, crawler_config)
-        is_streaming = crawler_config.stream if crawler_config else False
-        
-        self.logger.info(
-            f"Starting crawl for {len(urls)} URLs {'(streaming)' if is_streaming else ''}",
-            tag="INIT"
-        )
-        
-        if is_streaming:
-            async def result_generator() -> AsyncGenerator[CrawlResult, None]:
-                try:
-                    async with self._http_client.stream(
-                        "POST",
-                        f"{self.base_url}/crawl",
-                        json=request_data,
-                        timeout=None
-                    ) as response:
-                        response.raise_for_status()
-                        async for line in response.aiter_lines():
-                            if line.strip():
-                                try:
-                                    result_dict = json.loads(line)
-                                    if "error" in result_dict:
-                                        self.logger.error_status(
-                                            url=result_dict.get('url', 'unknown'),
-                                            error=result_dict['error']
-                                        )
-                                        continue
-                                    
-                                    self.logger.url_status(
-                                        url=result_dict.get('url', 'unknown'),
-                                        success=True,
-                                        timing=result_dict.get('timing', 0.0)
-                                    )
-                                    yield CrawlResult(**result_dict)
-                                except json.JSONDecodeError as e:
-                                    self.logger.error(f"Failed to parse server response: {e}", tag="ERROR")
-                                    continue
-                except httpx.StreamError as e:
-                    error_msg = f"Stream connection error: {str(e)}"
-                    self.logger.error(error_msg, tag="ERROR")
-                    raise ConnectionError(error_msg)
-                except Exception as e:
-                    error_msg = f"Unexpected error during streaming: {str(e)}"
-                    self.logger.error(error_msg, tag="ERROR")
-                    raise Crawl4aiClientError(error_msg)
-
-            return result_generator()
-        
-        response = await self._make_request("POST", "/crawl", json=request_data)
-        response_data = response.json()
-        
-        if not response_data.get("success", False):
-            error_msg = f"Crawl operation failed: {response_data.get('error', 'Unknown error')}"
-            self.logger.error(error_msg, tag="ERROR")
-            raise RequestError(error_msg)
-        
-        results = [CrawlResult(**result_dict) for result_dict in response_data.get("results", [])]
-        self.logger.success(f"Crawl completed successfully with {len(results)} results", tag="COMPLETE")
-        return results[0] if len(results) == 1 else results
-
-    async def get_schema(self) -> Dict[str, Any]:
-        """Retrieve the configuration schemas from the server."""
-        self.logger.info("Retrieving schema from server", tag="FETCH")
-        response = await self._make_request("GET", "/schema")
-        self.logger.success("Schema retrieved successfully", tag="COMPLETE")
-        return response.json()
-
-    async def close(self) -> None:
-        """Close the HTTP client session."""
-        self.logger.info("Closing client connection", tag="COMPLETE")
-        await self._http_client.aclose()
-
-    async def __aenter__(self) -> "Crawl4aiDockerClient":
-        return self
-
-    async def __aexit__(self, exc_type: Optional[type], exc_val: Optional[Exception], exc_tb: Optional[Any]) -> None:
-        await self.close()
\ No newline at end of file
diff --git a/crawl4ai/processors/pdf/__init__.py b/crawl4ai/processors/pdf/__init__.py
index ac55e9de..947641cb 100644
--- a/crawl4ai/processors/pdf/__init__.py
+++ b/crawl4ai/processors/pdf/__init__.py
@@ -1,8 +1,6 @@
-from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, List, Optional
+import asyncio
 from dataclasses import asdict
-
 from crawl4ai.async_logger import AsyncLogger
 from crawl4ai.async_crawler_strategy import AsyncCrawlerStrategy
 from crawl4ai.models import AsyncCrawlResponse, ScrapingResult 
diff --git a/crawl4ai/processors/pdf/processor.py b/crawl4ai/processors/pdf/processor.py
index 9dc58a2e..16963f70 100644
--- a/crawl4ai/processors/pdf/processor.py
+++ b/crawl4ai/processors/pdf/processor.py
@@ -5,21 +5,22 @@ from datetime import datetime
 from pathlib import Path
 from time import time
 from dataclasses import dataclass, asdict, field
-from typing import Dict, List, Optional, Tuple
-import PyPDF2
-from PIL import Image
-from PyPDF2 import PdfReader
-from .utils import *
+from typing import Dict, List, Optional, Any, Union
 import base64
 import tempfile
+from .utils import *
+from .utils import (
+    apply_png_predictor,
+    clean_pdf_text,
+    clean_pdf_text_to_html,
+)
+
+# Remove direct PyPDF2 imports from the top
+# import PyPDF2
+# from PyPDF2 import PdfReader
 
 logger = logging.getLogger(__name__)
 
-from dataclasses import dataclass, field
-from datetime import datetime
-from typing import List, Optional, Dict, Any
-from pathlib import Path
-
 @dataclass
 class PDFMetadata:
     title: Optional[str] = None
@@ -35,8 +36,8 @@ class PDFMetadata:
 class PDFPage:
     page_number: int
     raw_text: str = ""
-    markdown: str = ""  # Added per your request
-    html: str = ""  # Added per your request
+    markdown: str = ""
+    html: str = ""
     images: List[Dict] = field(default_factory=list)
     links: List[str] = field(default_factory=list)
     layout: List[Dict] = field(default_factory=list)
@@ -56,6 +57,12 @@ class PDFProcessorStrategy(ABC):
 class NaivePDFProcessorStrategy(PDFProcessorStrategy):
     def __init__(self, image_dpi: int = 144, image_quality: int = 85, extract_images: bool = True, 
                  save_images_locally: bool = False, image_save_dir: Optional[Path] = None, batch_size: int = 4):
+        # Import check at initialization time
+        try:
+            import PyPDF2
+        except ImportError:
+            raise ImportError("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
+            
         self.image_dpi = image_dpi
         self.image_quality = image_quality
         self.current_page_number = 0
@@ -66,6 +73,12 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
         self._temp_dir = None
 
     def process(self, pdf_path: Path) -> PDFProcessResult:
+        # Import inside method to allow dependency to be optional
+        try:
+            from PyPDF2 import PdfReader
+        except ImportError:
+            raise ImportError("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
+            
         start_time = time()
         result = PDFProcessResult(
             metadata=PDFMetadata(),
@@ -110,6 +123,13 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
 
     def process_batch(self, pdf_path: Path) -> PDFProcessResult:
         """Like process() but processes PDF pages in parallel batches"""
+        # Import inside method to allow dependency to be optional
+        try:
+            from PyPDF2 import PdfReader
+            import PyPDF2  # For type checking
+        except ImportError:
+            raise ImportError("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
+            
         import concurrent.futures
         import threading
         
@@ -212,6 +232,12 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
         return pdf_page
 
     def _extract_images(self, page, image_dir: Optional[Path]) -> List[Dict]:
+        # Import PyPDF2 for type checking only when needed
+        try:
+            import PyPDF2
+        except ImportError:
+            raise ImportError("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
+            
         if not self.extract_images:
             return []
 
@@ -262,6 +288,7 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
                                                 data = apply_png_predictor(data, width, bits, colors)
 
                                             # Create PIL Image
+                                            from PIL import Image
                                             mode = 'RGB' if color_space == '/DeviceRGB' else 'L'
                                             img = Image.frombytes(mode, (width, height), data)
                                             
@@ -385,9 +412,14 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
                 print(f"Link error: {str(e)}")
         return links
 
-    def _extract_metadata(self, pdf_path: Path, reader: PdfReader = None) -> PDFMetadata:
-        if not reader:
-            reader = PdfReader(pdf_path)
+    def _extract_metadata(self, pdf_path: Path, reader = None) -> PDFMetadata:
+        # Import inside method to allow dependency to be optional 
+        if reader is None:
+            try:
+                from PyPDF2 import PdfReader
+                reader = PdfReader(pdf_path)
+            except ImportError:
+                raise ImportError("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
 
         meta = reader.metadata or {}
         created = self._parse_pdf_date(meta.get('/CreationDate', ''))
@@ -425,6 +457,15 @@ class NaivePDFProcessorStrategy(PDFProcessorStrategy):
 if __name__ == "__main__":
     import json
     from pathlib import Path
+    
+    try:
+        # Import PyPDF2 only when running the file directly
+        import PyPDF2
+        from PyPDF2 import PdfReader
+    except ImportError:
+        print("PyPDF2 is required for PDF processing. Install with 'pip install crawl4ai[pdf]'")
+        exit(1)
+        
     current_dir = Path(__file__).resolve().parent
     pdf_path = f'{current_dir}/test.pdf'
     
diff --git a/deploy/Dockerfile b/deploy/Dockerfile
deleted file mode 100644
index 3043bd57..00000000
--- a/deploy/Dockerfile
+++ /dev/null
@@ -1,137 +0,0 @@
-FROM python:3.10-slim
-
-# Set build arguments
-ARG APP_HOME=/app
-ARG GITHUB_REPO=https://github.com/unclecode/crawl4ai.git
-ARG GITHUB_BRANCH=next
-ARG USE_LOCAL=False
-ARG CONFIG_PATH=""
-
-ENV PYTHONFAULTHANDLER=1 \
-    PYTHONHASHSEED=random \
-    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=1 \
-    PIP_DEFAULT_TIMEOUT=100 \
-    DEBIAN_FRONTEND=noninteractive \
-    REDIS_HOST=localhost \
-    REDIS_PORT=6379
-
-ARG PYTHON_VERSION=3.10
-ARG INSTALL_TYPE=default
-ARG ENABLE_GPU=false
-ARG TARGETARCH
-
-LABEL maintainer="unclecode"
-LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
-LABEL version="1.0"    
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    curl \
-    wget \
-    gnupg \
-    git \
-    cmake \
-    pkg-config \
-    python3-dev \
-    libjpeg-dev \
-    redis-server \
-    supervisor \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    libglib2.0-0 \
-    libnss3 \
-    libnspr4 \
-    libatk1.0-0 \
-    libatk-bridge2.0-0 \
-    libcups2 \
-    libdrm2 \
-    libdbus-1-3 \
-    libxcb1 \
-    libxkbcommon0 \
-    libx11-6 \
-    libxcomposite1 \
-    libxdamage1 \
-    libxext6 \
-    libxfixes3 \
-    libxrandr2 \
-    libgbm1 \
-    libpango-1.0-0 \
-    libcairo2 \
-    libasound2 \
-    libatspi2.0-0 \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETARCH" = "amd64" ] ; then \
-    apt-get update && apt-get install -y --no-install-recommends \
-    nvidia-cuda-toolkit \
-    && rm -rf /var/lib/apt/lists/* ; \
-else \
-    echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
-fi
-
-RUN if [ "$TARGETARCH" = "arm64" ]; then \
-    echo "🦾 Installing ARM-specific optimizations"; \
-    apt-get update && apt-get install -y --no-install-recommends \
-    libopenblas-dev \
-    && rm -rf /var/lib/apt/lists/*; \
-elif [ "$TARGETARCH" = "amd64" ]; then \
-    echo "🖥️ Installing AMD64-specific optimizations"; \
-    apt-get update && apt-get install -y --no-install-recommends \
-    libomp-dev \
-    && rm -rf /var/lib/apt/lists/*; \
-else \
-    echo "Skipping platform-specific optimizations (unsupported platform)"; \
-fi
-
-WORKDIR ${APP_HOME}
-
-RUN git clone --branch ${GITHUB_BRANCH} ${GITHUB_REPO} /tmp/crawl4ai
-
-COPY docker/supervisord.conf .
-COPY docker/requirements.txt .
-
-RUN pip install --no-cache-dir -r requirements.txt
-
-RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
-        pip install "/tmp/crawl4ai/[all]" && \
-        python -m nltk.downloader punkt stopwords && \
-        python -m crawl4ai.model_loader ; \
-    elif [ "$INSTALL_TYPE" = "torch" ] ; then \
-        pip install "/tmp/crawl4ai/[torch]" ; \
-    elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
-        pip install "/tmp/crawl4ai/[transformer]" && \
-        python -m crawl4ai.model_loader ; \
-    else \
-        pip install "/tmp/crawl4ai" ; \
-    fi
-    
-RUN pip install --no-cache-dir --upgrade pip && \
-    python -c "import crawl4ai; print('✅ crawl4ai is ready to rock!')" && \
-    python -c "from playwright.sync_api import sync_playwright; print('✅ Playwright is feeling dramatic!')"
-    
-RUN playwright install --with-deps chromium
-
-COPY docker/* ${APP_HOME}/
-RUN if [ -n "$CONFIG_PATH" ] && [ -f "$CONFIG_PATH" ]; then \
-    echo "Using custom config from $CONFIG_PATH" && \
-    cp $CONFIG_PATH /app/config.yml; \
-fi
-
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD bash -c '\
-    MEM=$(free -m | awk "/^Mem:/{print \$2}"); \
-    if [ $MEM -lt 2048 ]; then \
-        echo "⚠️ Warning: Less than 2GB RAM available! Your container might need a memory boost! 🚀"; \
-        exit 1; \
-    fi && \
-    redis-cli ping > /dev/null && \
-    curl -f http://localhost:8000/health || exit 1'
-
-# EXPOSE 6379
-
-CMD ["supervisord", "-c", "supervisord.conf"]
-    
diff --git a/docs/examples/hello_world.py b/docs/examples/hello_world.py
index 89f3188a..ae74a4b3 100644
--- a/docs/examples/hello_world.py
+++ b/docs/examples/hello_world.py
@@ -6,6 +6,7 @@ from crawl4ai import (
     CacheMode,
     DefaultMarkdownGenerator,
     PruningContentFilter,
+    CrawlResult
 )
 
 
@@ -20,10 +21,12 @@ async def main():
                 )
             ),
         )
-        result = await crawler.arun(
-            url="https://www.helloworld.org", config=crawler_config
+        result : CrawlResult = await crawler.arun(
+            # url="https://www.helloworld.org", config=crawler_config
+            url="https://www.kidocode.com", config=crawler_config
         )
         print(result.markdown_v2.raw_markdown[:500])
+        # print(result.model_dump())
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
index f59eabd1..bcee2974 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ classifiers = [
 ]
 
 [project.optional-dependencies]
+pdf = ["PyPDF2"]  
 torch = ["torch", "nltk", "scikit-learn"]
 transformer = ["transformers", "tokenizers"]
 cosine = ["torch", "transformers", "nltk"]
@@ -66,7 +67,8 @@ all = [
     "scikit-learn",
     "transformers",
     "tokenizers",
-    "selenium"
+    "selenium",
+    "PyPDF2"  
 ]
 
 [project.scripts]