From 513d008de5464e7b09d3423e64ad32ef7e379707 Mon Sep 17 00:00:00 2001
From: Aravind Karnam <aravind.karanam@gmail.com>
Date: Mon, 27 Jan 2025 11:54:10 +0530
Subject: [PATCH] feat: Merge reviews from unclecode for scorers and filters &
 Remove the robots.txt compliance from scraper since that will be now handled
 by crawler

---
 crawl4ai/scraper/bfs_scraper_strategy.py |   40 +-
 crawl4ai/scraper/filters.py              |  787 ++++++++++++--
 crawl4ai/scraper/filters_review.py       |  872 ----------------
 crawl4ai/scraper/scorers.py              |  953 ++++++++++++++++-
 crawl4ai/scraper/scorers_review.py       | 1208 ----------------------
 5 files changed, 1631 insertions(+), 2229 deletions(-)
 delete mode 100644 crawl4ai/scraper/filters_review.py
 delete mode 100644 crawl4ai/scraper/scorers_review.py

diff --git a/crawl4ai/scraper/bfs_scraper_strategy.py b/crawl4ai/scraper/bfs_scraper_strategy.py
index c9db4c1c..646ff1d1 100644
--- a/crawl4ai/scraper/bfs_scraper_strategy.py
+++ b/crawl4ai/scraper/bfs_scraper_strategy.py
@@ -4,7 +4,7 @@ from datetime import datetime
 import asyncio
 import logging
 from urllib.parse import urlparse
-from urllib.robotparser import RobotFileParser
+
 # import validators
 
 from ..async_configs import CrawlerRunConfig
@@ -26,7 +26,6 @@ class CrawlStats:
     urls_skipped: int = 0
     total_depth_reached: int = 0
     current_depth: int = 0
-    robots_blocked: int = 0
 
 
 class BFSScraperStrategy(ScraperStrategy):
@@ -49,13 +48,11 @@ class BFSScraperStrategy(ScraperStrategy):
         self.stats = CrawlStats(start_time=datetime.now())
         self._cancel_event = asyncio.Event()
         self.process_external_links = process_external_links
-        self.robot_parsers: Dict[str, RobotFileParser] = {}
 
     async def can_process_url(self, url: str, depth: int) -> bool:
         """Check if URL can be processed based on robots.txt and filters
         This is our gatekeeper method that determines if a URL should be processed. It:
             - Validates URL format using a robust built-in method
-            - Checks robots.txt permissions for the domain
             - Applies custom filters from the filter chain
             - Updates statistics for blocked URLs
             - Returns False early if any check fails
@@ -64,47 +61,20 @@ class BFSScraperStrategy(ScraperStrategy):
             result = urlparse(url)
             if not all([result.scheme, result.netloc]):
                 raise ValueError("Invalid URL")
-            if result.scheme not in ('http', 'https'):
+            if result.scheme not in ("http", "https"):
                 raise ValueError("URL must be HTTP or HTTPS")
-            if not result.netloc or '.' not in result.netloc:
+            if not result.netloc or "." not in result.netloc:
                 raise ValueError("Invalid domain")
         except Exception as e:
             self.logger.warning(f"Invalid URL: {url}. Error: {str(e)}")
             return False
 
-        robot_parser = await self._get_robot_parser(url)
-        if robot_parser and not robot_parser.can_fetch("*", url):
-            self.stats.robots_blocked += 1
-            self.logger.info(f"Blocked by robots.txt: {url}")
-            return False
-
         # Apply the filter chain if it's not start page
         if depth != 0 and not self.filter_chain.apply(url):
             return False
 
         return True
 
-    async def _get_robot_parser(self, url: str) -> Optional[RobotFileParser]:
-        """Get or create robots.txt parser for domain.
-        This is our robots.txt manager that:
-            - Uses domain-level caching of robot parsers
-            - Creates and caches new parsers as needed
-            - Handles failed robots.txt fetches gracefully
-            - Returns None if robots.txt can't be fetched, allowing crawling to proceed
-        """
-        domain = urlparse(url).netloc
-        if domain not in self.robot_parsers:
-            parser = RobotFileParser()
-            try:
-                robots_url = f"{urlparse(url).scheme}://{domain}/robots.txt"
-                parser.set_url(robots_url)
-                parser.read()
-                self.robot_parsers[domain] = parser
-            except Exception as e:
-                self.logger.warning(f"Error fetching robots.txt for {domain}: {e}")
-                return None
-        return self.robot_parsers[domain]
-
     async def _process_links(
         self,
         result: CrawlResult,
@@ -117,7 +87,7 @@ class BFSScraperStrategy(ScraperStrategy):
         """Process extracted links from crawl result.
         This is our link processor that:
             Handles both internal and external links
-            Checks if URL can be processed - validates URL, applies Filters and tests Robots.txt compliance with can_process_url
+            Checks if URL can be processed - validates URL, applies Filters with can_process_url
             Checks depth limits
             Scores URLs for priority
             Updates depth tracking
@@ -233,5 +203,3 @@ class BFSScraperStrategy(ScraperStrategy):
     async def shutdown(self):
         """Clean up resources and stop crawling"""
         self._cancel_event.set()
-        # Clear caches and close connections
-        self.robot_parsers.clear()
diff --git a/crawl4ai/scraper/filters.py b/crawl4ai/scraper/filters.py
index b0547ab9..1e123d11 100644
--- a/crawl4ai/scraper/filters.py
+++ b/crawl4ai/scraper/filters.py
@@ -1,29 +1,30 @@
-# from .url_filter import URLFilter, FilterChain
-# from .content_type_filter import ContentTypeFilter
-# from .url_pattern_filter import URLPatternFilter
-
 from abc import ABC, abstractmethod
-from typing import List, Pattern, Set, Union
-import re
+from typing import List, Pattern, Set, Union, FrozenSet
+import re, time
 from urllib.parse import urlparse
-import mimetypes
+from array import array
 import logging
-from dataclasses import dataclass
+from functools import lru_cache
 import fnmatch
+from dataclasses import dataclass
+from typing import ClassVar
+import weakref
+import mimetypes
 
 
 @dataclass
 class FilterStats:
-    """Statistics for filter applications"""
-
+    # PERF: Using dataclass creates overhead with __init__ and property access
+    # PERF: Could use __slots__ to reduce memory footprint
+    # PERF: Consider using array.array('I') for atomic increments
     total_urls: int = 0
     rejected_urls: int = 0
     passed_urls: int = 0
 
 
 class URLFilter(ABC):
-    """Base class for URL filters"""
-
+    # PERF: Logger creation is expensive, consider lazy initialization
+    # PERF: stats object creation adds overhead for each filter instance
     def __init__(self, name: str = None):
         self.name = name or self.__class__.__name__
         self.stats = FilterStats()
@@ -31,37 +32,29 @@ class URLFilter(ABC):
 
     @abstractmethod
     def apply(self, url: str) -> bool:
-        """Apply the filter to a URL"""
         pass
 
     def _update_stats(self, passed: bool):
-        """Update filter statistics"""
-        # INFO: Old trick to make things faster
+        # PERF: Already optimized but could use bitwise operations
+        # PERF: Consider removing stats entirely in production/fast mode
         self.stats.total_urls += 1
         self.stats.passed_urls += passed
-        self.stats.rejected_urls += not passed        
-        # self.stats.total_urls += 1
-        # if passed:
-        #     self.stats.passed_urls += 1
-        # else:
-        #     self.stats.rejected_urls += 1
+        self.stats.rejected_urls += not passed
 
 
 class FilterChain:
-    """Chain of URL filters."""
-
+    # PERF: List traversal for each URL is expensive
+    # PERF: Could use array.array instead of list for filters
+    # PERF: Consider adding fast path for single filter case
     def __init__(self, filters: List[URLFilter] = None):
         self.filters = filters or []
         self.stats = FilterStats()
         self.logger = logging.getLogger("urlfilter.chain")
 
-    def add_filter(self, filter_: URLFilter) -> "FilterChain":
-        """Add a filter to the chain"""
-        self.filters.append(filter_)
-        return self  # Enable method chaining
-
     def apply(self, url: str) -> bool:
-        """Apply all filters in the chain"""
+        # PERF: Logging on every rejection is expensive
+        # PERF: Could reorder filters by rejection rate
+        # PERF: Consider batch processing mode
         self.stats.total_urls += 1
 
         for filter_ in self.filters:
@@ -75,19 +68,9 @@ class FilterChain:
 
 
 class URLPatternFilter(URLFilter):
-    """Filter URLs based on glob patterns or regex.
-
-    pattern_filter = URLPatternFilter([
-        "*.example.com/*",  # Glob pattern
-        "*/article/*",      # Path pattern
-        re.compile(r"blog-\d+") # Regex pattern
-    ])
-
-    - Supports glob patterns and regex
-    - Multiple patterns per filter
-    - Pattern pre-compilation for performance
-    """
-
+    # PERF: Converting glob to regex is expensive
+    # PERF: Multiple regex compilation is slow
+    # PERF: List of patterns causes multiple regex evaluations
     def __init__(
         self,
         patterns: Union[str, Pattern, List[Union[str, Pattern]]],
@@ -98,6 +81,8 @@ class URLPatternFilter(URLFilter):
         self.use_glob = use_glob
         self._compiled_patterns = []
 
+        # PERF: This could be consolidated into a single regex with OR conditions
+        # PERF: glob_to_regex creates complex patterns, could be simplified
         for pattern in self.patterns:
             if isinstance(pattern, str) and use_glob:
                 self._compiled_patterns.append(self._glob_to_regex(pattern))
@@ -107,29 +92,22 @@ class URLPatternFilter(URLFilter):
                 )
 
     def _glob_to_regex(self, pattern: str) -> Pattern:
-        """Convert glob pattern to regex"""
+        # PERF: fnmatch.translate creates overly complex patterns
+        # PERF: Could cache common translations
         return re.compile(fnmatch.translate(pattern))
 
     def apply(self, url: str) -> bool:
-        """Check if URL matches any of the patterns"""
+        # PERF: any() with generator is slower than direct loop with early return
+        # PERF: searching entire string is slower than anchored match
         matches = any(pattern.search(url) for pattern in self._compiled_patterns)
         self._update_stats(matches)
         return matches
 
 
 class ContentTypeFilter(URLFilter):
-    """Filter URLs based on expected content type.
-
-    content_filter = ContentTypeFilter([
-        "text/html",
-        "application/pdf"
-    ], check_extension=True)
-
-    - Filter by MIME types
-    - Extension checking
-    - Support for multiple content types
-    """
-
+    # PERF: mimetypes guessing is extremely slow
+    # PERF: URL parsing on every check is expensive
+    # PERF: No caching of results for similar extensions
     def __init__(
         self, allowed_types: Union[str, List[str]], check_extension: bool = True
     ):
@@ -145,15 +123,18 @@ class ContentTypeFilter(URLFilter):
         self.allowed_types = [t.lower() for t in self.allowed_types]
 
     def _check_extension(self, url: str) -> bool:
-        """Check URL's file extension"""
+        # PERF: urlparse is called on every check
+        # PERF: multiple string splits are expensive
+        # PERF: mimetypes.guess_type is very slow
         ext = (
             urlparse(url).path.split(".")[-1].lower()
             if "." in urlparse(url).path
             else ""
         )
         if not ext:
-            return True  # No extension, might be dynamic content
+            return True
 
+        # PERF: guess_type is main bottleneck
         guessed_type = mimetypes.guess_type(url)[0]
         return any(
             allowed in (guessed_type or "").lower() for allowed in self.allowed_types
@@ -169,24 +150,16 @@ class ContentTypeFilter(URLFilter):
 
 
 class DomainFilter(URLFilter):
-    """Filter URLs based on allowed/blocked domains.
-
-    domain_filter = DomainFilter(
-        allowed_domains=["example.com", "blog.example.com"],
-        blocked_domains=["ads.example.com"]
-    )
-
-    - Allow/block specific domains
-    - Subdomain support
-    - Efficient domain matching
-    """
-
+    # PERF: Set lookups are fast but string normalizations on init are not
+    # PERF: Creating two sets doubles memory usage
     def __init__(
         self,
         allowed_domains: Union[str, List[str]] = None,
         blocked_domains: Union[str, List[str]] = None,
     ):
         super().__init__()
+        # PERF: Normalizing domains on every init is wasteful
+        # PERF: Could use frozenset for immutable lists
         self.allowed_domains = (
             set(self._normalize_domains(allowed_domains)) if allowed_domains else None
         )
@@ -195,17 +168,21 @@ class DomainFilter(URLFilter):
         )
 
     def _normalize_domains(self, domains: Union[str, List[str]]) -> List[str]:
-        """Normalize domain strings"""
+        # PERF: strip() and lower() create new strings for each domain
+        # PERF: List comprehension creates intermediate list
         if isinstance(domains, str):
             domains = [domains]
         return [d.lower().strip() for d in domains]
 
     def _extract_domain(self, url: str) -> str:
-        """Extract domain from URL"""
+        # PERF: urlparse is called for every URL check
+        # PERF: lower() creates new string every time
+        # PERF: Could cache recent results
         return urlparse(url).netloc.lower()
 
     def apply(self, url: str) -> bool:
-        """Check if URL's domain is allowed"""
+        # PERF: Two separate set lookups in worst case
+        # PERF: Domain extraction happens before knowing if we have any filters
         domain = self._extract_domain(url)
 
         if domain in self.blocked_domains:
@@ -239,47 +216,653 @@ def create_common_filter_chain() -> FilterChain:
     )
 
 
+####################################################################################
+# Uncledoe: Optimized Version
+####################################################################################
+
+
+# Use __slots__ and array for maximum memory/speed efficiency
+class FastFilterStats:
+    __slots__ = ("_counters",)
+
+    def __init__(self):
+        # Use array of unsigned ints for atomic operations
+        self._counters = array("I", [0, 0, 0])  # total, passed, rejected
+
+    @property
+    def total_urls(self):
+        return self._counters[0]
+
+    @property
+    def passed_urls(self):
+        return self._counters[1]
+
+    @property
+    def rejected_urls(self):
+        return self._counters[2]
+
+
+class FastURLFilter(ABC):
+    """Optimized base filter class"""
+
+    __slots__ = ("name", "stats", "_logger_ref")
+
+    def __init__(self, name: str = None):
+        self.name = name or self.__class__.__name__
+        self.stats = FastFilterStats()
+        # Lazy logger initialization using weakref
+        self._logger_ref = None
+
+    @property
+    def logger(self):
+        if self._logger_ref is None or self._logger_ref() is None:
+            logger = logging.getLogger(f"urlfilter.{self.name}")
+            self._logger_ref = weakref.ref(logger)
+        return self._logger_ref()
+
+    @abstractmethod
+    def apply(self, url: str) -> bool:
+        pass
+
+    def _update_stats(self, passed: bool):
+        # Use direct array index for speed
+        self.stats._counters[0] += 1  # total
+        self.stats._counters[1] += passed  # passed
+        self.stats._counters[2] += not passed  # rejected
+
+
+class FastFilterChain:
+    """Optimized filter chain"""
+
+    __slots__ = ("filters", "stats", "_logger_ref")
+
+    def __init__(self, filters: List[FastURLFilter] = None):
+        self.filters = tuple(filters or [])  # Immutable tuple for speed
+        self.stats = FastFilterStats()
+        self._logger_ref = None
+
+    @property
+    def logger(self):
+        if self._logger_ref is None or self._logger_ref() is None:
+            logger = logging.getLogger("urlfilter.chain")
+            self._logger_ref = weakref.ref(logger)
+        return self._logger_ref()
+
+    def add_filter(self, filter_: FastURLFilter) -> "FastFilterChain":
+        """Add a filter to the chain"""
+        self.filters.append(filter_)
+        return self  # Enable method chaining
+
+    def apply(self, url: str) -> bool:
+        """Optimized apply with minimal operations"""
+        self.stats._counters[0] += 1  # total
+
+        # Direct tuple iteration is faster than list
+        for f in self.filters:
+            if not f.apply(url):
+                self.stats._counters[2] += 1  # rejected
+                return False
+
+        self.stats._counters[1] += 1  # passed
+        return True
+
+class FastURLPatternFilter(FastURLFilter):
+    """Pattern filter balancing speed and completeness"""
+    __slots__ = ('_simple_suffixes', '_simple_prefixes', '_domain_patterns', '_path_patterns')
+    
+    PATTERN_TYPES = {
+        'SUFFIX': 1,    # *.html
+        'PREFIX': 2,    # /foo/*
+        'DOMAIN': 3,    # *.example.com
+        'PATH': 4 ,      # Everything else
+        'REGEX': 5 
+    }
+    
+    def __init__(self, patterns: Union[str, Pattern, List[Union[str, Pattern]]], use_glob: bool = True):
+        super().__init__()
+        patterns = [patterns] if isinstance(patterns, (str, Pattern)) else patterns
+        
+        self._simple_suffixes = set()
+        self._simple_prefixes = set()
+        self._domain_patterns = []
+        self._path_patterns = []
+        
+        for pattern in patterns:
+            pattern_type = self._categorize_pattern(pattern)
+            self._add_pattern(pattern, pattern_type)
+    
+    def _categorize_pattern(self, pattern: str) -> int:
+        """Categorize pattern for specialized handling"""
+        if not isinstance(pattern, str):
+            return self.PATTERN_TYPES['PATH']
+            
+        # Check if it's a regex pattern
+        if pattern.startswith('^') or pattern.endswith('$') or '\\d' in pattern:
+            return self.PATTERN_TYPES['REGEX']
+        
+        if pattern.count('*') == 1:
+            if pattern.startswith('*.'):
+                return self.PATTERN_TYPES['SUFFIX']
+            if pattern.endswith('/*'):
+                return self.PATTERN_TYPES['PREFIX']
+                
+        if '://' in pattern and pattern.startswith('*.'):
+            return self.PATTERN_TYPES['DOMAIN']
+            
+        return self.PATTERN_TYPES['PATH']
+    
+    def _add_pattern(self, pattern: str, pattern_type: int):
+        """Add pattern to appropriate matcher"""
+        if pattern_type == self.PATTERN_TYPES['REGEX']:
+            # For regex patterns, compile directly without glob translation
+            if isinstance(pattern, str) and (pattern.startswith('^') or pattern.endswith('$') or '\\d' in pattern):
+                self._path_patterns.append(re.compile(pattern))
+                return
+        elif pattern_type == self.PATTERN_TYPES['SUFFIX']:
+            self._simple_suffixes.add(pattern[2:])
+        elif pattern_type == self.PATTERN_TYPES['PREFIX']:
+            self._simple_prefixes.add(pattern[:-2])
+        elif pattern_type == self.PATTERN_TYPES['DOMAIN']:
+            self._domain_patterns.append(
+                re.compile(pattern.replace('*.', r'[^/]+\.'))
+            )
+        else:
+            if isinstance(pattern, str):
+                # Handle complex glob patterns
+                if '**' in pattern:
+                    pattern = pattern.replace('**', '.*')
+                if '{' in pattern:
+                    # Convert {a,b} to (a|b)
+                    pattern = re.sub(r'\{([^}]+)\}', 
+                                   lambda m: f'({"|".join(m.group(1).split(","))})',
+                                   pattern)
+                pattern = fnmatch.translate(pattern)
+            self._path_patterns.append(
+                pattern if isinstance(pattern, Pattern) else re.compile(pattern)
+            )
+
+    @lru_cache(maxsize=10000)
+    def apply(self, url: str) -> bool:
+        """Hierarchical pattern matching"""
+        # Quick suffix check (*.html)
+        if self._simple_suffixes:
+            path = url.split('?')[0]
+            if path.split('/')[-1].split('.')[-1] in self._simple_suffixes:
+                self._update_stats(True)
+                return True
+                
+        # Domain check
+        if self._domain_patterns:
+            for pattern in self._domain_patterns:
+                if pattern.match(url):
+                    self._update_stats(True)
+                    return True
+        
+        # Prefix check (/foo/*)
+        if self._simple_prefixes:
+            path = url.split('?')[0]
+            if any(path.startswith(p) for p in self._simple_prefixes):
+                self._update_stats(True)
+                return True
+                
+        # Complex patterns
+        if self._path_patterns:
+            if any(p.search(url) for p in self._path_patterns):
+                self._update_stats(True)
+                return True
+        
+        self._update_stats(False)
+        return False
+
+
+class FastContentTypeFilter(FastURLFilter):
+    """Optimized content type filter using fast lookups"""
+
+    __slots__ = ("allowed_types", "_ext_map", "_check_extension")
+
+    # Fast extension to mime type mapping
+    _MIME_MAP = {
+        # Text Formats
+        "txt": "text/plain",
+        "html": "text/html",
+        "htm": "text/html",
+        "xhtml": "application/xhtml+xml",
+        "css": "text/css",
+        "csv": "text/csv",
+        "ics": "text/calendar",
+        "js": "application/javascript",
+        # Images
+        "bmp": "image/bmp",
+        "gif": "image/gif",
+        "jpeg": "image/jpeg",
+        "jpg": "image/jpeg",
+        "png": "image/png",
+        "svg": "image/svg+xml",
+        "tiff": "image/tiff",
+        "ico": "image/x-icon",
+        "webp": "image/webp",
+        # Audio
+        "mp3": "audio/mpeg",
+        "wav": "audio/wav",
+        "ogg": "audio/ogg",
+        "m4a": "audio/mp4",
+        "aac": "audio/aac",
+        # Video
+        "mp4": "video/mp4",
+        "mpeg": "video/mpeg",
+        "webm": "video/webm",
+        "avi": "video/x-msvideo",
+        "mov": "video/quicktime",
+        "flv": "video/x-flv",
+        "wmv": "video/x-ms-wmv",
+        "mkv": "video/x-matroska",
+        # Applications
+        "json": "application/json",
+        "xml": "application/xml",
+        "pdf": "application/pdf",
+        "zip": "application/zip",
+        "gz": "application/gzip",
+        "tar": "application/x-tar",
+        "rar": "application/vnd.rar",
+        "7z": "application/x-7z-compressed",
+        "exe": "application/vnd.microsoft.portable-executable",
+        "msi": "application/x-msdownload",
+        # Fonts
+        "woff": "font/woff",
+        "woff2": "font/woff2",
+        "ttf": "font/ttf",
+        "otf": "font/otf",
+        # Microsoft Office
+        "doc": "application/msword",
+        "dot": "application/msword",
+        "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "xls": "application/vnd.ms-excel",
+        "ppt": "application/vnd.ms-powerpoint",
+        "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        # OpenDocument Formats
+        "odt": "application/vnd.oasis.opendocument.text",
+        "ods": "application/vnd.oasis.opendocument.spreadsheet",
+        "odp": "application/vnd.oasis.opendocument.presentation",
+        # Archives
+        "tar.gz": "application/gzip",
+        "tgz": "application/gzip",
+        "bz2": "application/x-bzip2",
+        # Others
+        "rtf": "application/rtf",
+        "apk": "application/vnd.android.package-archive",
+        "epub": "application/epub+zip",
+        "jar": "application/java-archive",
+        "swf": "application/x-shockwave-flash",
+        "midi": "audio/midi",
+        "mid": "audio/midi",
+        "ps": "application/postscript",
+        "ai": "application/postscript",
+        "eps": "application/postscript",
+        # Custom or less common
+        "bin": "application/octet-stream",
+        "dmg": "application/x-apple-diskimage",
+        "iso": "application/x-iso9660-image",
+        "deb": "application/x-debian-package",
+        "rpm": "application/x-rpm",
+        "sqlite": "application/vnd.sqlite3",
+        # Placeholder
+        "unknown": "application/octet-stream",  # Fallback for unknown file types
+    }
+
+    @staticmethod
+    @lru_cache(maxsize=1000)
+    def _extract_extension(path: str) -> str:
+        """Fast extension extraction with caching"""
+        if "." not in path:
+            return ""
+        return path.rpartition(".")[-1].lower()
+
+    def __init__(
+        self, allowed_types: Union[str, List[str]], check_extension: bool = True
+    ):
+        super().__init__()
+        # Normalize and store as frozenset for fast lookup
+        self.allowed_types = frozenset(
+            t.lower()
+            for t in (
+                allowed_types if isinstance(allowed_types, list) else [allowed_types]
+            )
+        )
+        self._check_extension = check_extension
+
+        # Pre-compute extension map for allowed types
+        self._ext_map = frozenset(
+            ext
+            for ext, mime in self._MIME_MAP.items()
+            if any(allowed in mime for allowed in self.allowed_types)
+        )
+
+    @lru_cache(maxsize=1000)
+    def _check_url_cached(self, url: str) -> bool:
+        """Cached URL checking"""
+        if not self._check_extension:
+            return True
+
+        path = url.split("?")[0]  # Fast path split
+        ext = self._extract_extension(path)
+        if not ext:
+            return True
+
+        return ext in self._ext_map
+
+    def apply(self, url: str) -> bool:
+        """Fast extension check with caching"""
+        result = self._check_url_cached(url)
+        self._update_stats(result)
+        return result
+
+
+class FastDomainFilter(FastURLFilter):
+    """Optimized domain filter with fast lookups and caching"""
+
+    __slots__ = ("_allowed_domains", "_blocked_domains", "_domain_cache")
+
+    # Regex for fast domain extraction
+    _DOMAIN_REGEX = re.compile(r"://([^/]+)")
+
+    def __init__(
+        self,
+        allowed_domains: Union[str, List[str]] = None,
+        blocked_domains: Union[str, List[str]] = None,
+    ):
+        super().__init__()
+
+        # Convert inputs to frozensets for immutable, fast lookups
+        self._allowed_domains = (
+            frozenset(self._normalize_domains(allowed_domains))
+            if allowed_domains
+            else None
+        )
+        self._blocked_domains = (
+            frozenset(self._normalize_domains(blocked_domains))
+            if blocked_domains
+            else frozenset()
+        )
+
+    @staticmethod
+    def _normalize_domains(domains: Union[str, List[str]]) -> Set[str]:
+        """Fast domain normalization"""
+        if isinstance(domains, str):
+            return {domains.lower()}
+        return {d.lower() for d in domains}
+
+    @staticmethod
+    @lru_cache(maxsize=10000)
+    def _extract_domain(url: str) -> str:
+        """Ultra-fast domain extraction with regex and caching"""
+        match = FastDomainFilter._DOMAIN_REGEX.search(url)
+        return match.group(1).lower() if match else ""
+
+    def apply(self, url: str) -> bool:
+        """Optimized domain checking with early returns"""
+        # Skip processing if no filters
+        if not self._blocked_domains and self._allowed_domains is None:
+            self._update_stats(True)
+            return True
+
+        domain = self._extract_domain(url)
+
+        # Early return for blocked domains
+        if domain in self._blocked_domains:
+            self._update_stats(False)
+            return False
+
+        # If no allowed domains specified, accept all non-blocked
+        if self._allowed_domains is None:
+            self._update_stats(True)
+            return True
+
+        # Final allowed domains check
+        result = domain in self._allowed_domains
+        self._update_stats(result)
+        return result
+
+
+def create_fast_filter_chain() -> FastFilterChain:
+    """Create an optimized filter chain with filters ordered by rejection rate"""
+    return FastFilterChain(
+        [
+            # Domain filter first (fastest rejection)
+            FastDomainFilter(blocked_domains=["ads.*", "analytics.*"]),
+            # Content filter second (medium speed)
+            FastContentTypeFilter(["text/html", "application/xhtml+xml"]),
+            # Pattern filter last (most expensive)
+            FastURLPatternFilter(
+                [
+                    "*.html",
+                    "*.htm",
+                    "*/article/*",
+                    "*/blog/*",
+                ]
+            ),
+        ]
+    )
+
 
 def run_performance_test():
     import time
     import random
-    
-    # Test URLs
-    test_urls = [
-        'https://example.com/article/123',
-        'https://blog.example.com/post/456',
-        'https://ads.example.com/tracking',
-        'https://example.com/about.html',
-        'https://analytics.example.com/script.js',
-        'https://example.com/products.php',
-        'https://subdomain.example.com/blog/post-123',
-        'https://example.com/path/file.pdf',
-    ] * 100000  # Create 800k URLs to test
-    
-    def benchmark(name: str, func, *args):
+    from itertools import cycle
+
+    # Generate test URLs
+    base_urls = [
+        "https://example.com/article/123",
+        "https://blog.example.com/post/456",
+        "https://ads.example.com/tracking",
+        "https://example.com/about.html",
+        "https://analytics.example.com/script.js",
+        "https://example.com/products.php",
+        "https://subdomain.example.com/blog/post-123",
+        "https://example.com/path/file.pdf",
+    ]
+
+    # Create more varied test data
+    test_urls = []
+    for base in base_urls:
+        # Add original
+        test_urls.append(base)
+        # Add variations
+        parts = base.split("/")
+        for i in range(10):
+            parts[-1] = f"page_{i}.html"
+            test_urls.append("/".join(parts))
+
+    # Multiply to get enough test data
+    test_urls = test_urls * 10000  # Creates ~800k URLs
+
+    def benchmark(name: str, func, *args, warmup=True):
+        if warmup:
+            # Warmup run
+            func(*args)
+
+        # Actual timing
         start = time.perf_counter_ns()
-        result = func(*args) if args[0] else func()
+        result = func(*args)
         elapsed = (time.perf_counter_ns() - start) / 1_000_000  # Convert to ms
-        print(f"{name:<30} {elapsed:>8.3f} ms")
+        print(
+            f"{name:<30} {elapsed:>8.3f} ms  ({len(test_urls)/elapsed*1000:,.0f} URLs/sec)"
+        )
         return result
 
-    # Test individual filters
+    print("\nBenchmarking original vs optimized implementations...")
+    print("-" * 70)
+
+    # Original implementation
     pattern_filter = URLPatternFilter(["*.html", "*/article/*"])
     content_filter = ContentTypeFilter(["text/html"])
     domain_filter = DomainFilter(blocked_domains=["ads.*", "analytics.*"])
-    
-    # Test chain
     chain = FilterChain([pattern_filter, content_filter, domain_filter])
+
+    # Optimized implementation
+    fast_pattern_filter = FastURLPatternFilter(["*.html", "*/article/*"])
+    fast_content_filter = FastContentTypeFilter(["text/html"])
+    fast_domain_filter = FastDomainFilter(blocked_domains=["ads.*", "analytics.*"])
+    fast_chain = FastFilterChain(
+        [fast_domain_filter, fast_content_filter, fast_pattern_filter]
+    )
+
+    # Test individual filters
+    print("\nSingle filter performance (first 1000 URLs):")
+    test_subset = test_urls[:1000]
+
+    print("\nPattern Filters:")
+    benchmark(
+        "Original Pattern Filter",
+        lambda: [pattern_filter.apply(url) for url in test_subset],
+    )
+    benchmark(
+        "Optimized Pattern Filter",
+        lambda: [fast_pattern_filter.apply(url) for url in test_subset],
+    )
+
+    print("\nContent Filters:")
+    benchmark(
+        "Original Content Filter",
+        lambda: [content_filter.apply(url) for url in test_subset],
+    )
+    benchmark(
+        "Optimized Content Filter",
+        lambda: [fast_content_filter.apply(url) for url in test_subset],
+    )
+
+    print("\nDomain Filters:")
+    benchmark(
+        "Original Domain Filter",
+        lambda: [domain_filter.apply(url) for url in test_subset],
+    )
+    benchmark(
+        "Optimized Domain Filter",
+        lambda: [fast_domain_filter.apply(url) for url in test_subset],
+    )
+
+    print("\nFull Chain Performance (all URLs):")
+    # Test chain
+    benchmark("Original Chain", lambda: [chain.apply(url) for url in test_urls])
+    benchmark("Optimized Chain", lambda: [fast_chain.apply(url) for url in test_urls])
+
+    # Memory usage
+    import sys
+
+    print("\nMemory Usage per Filter:")
+    print(f"Original Pattern Filter: {sys.getsizeof(pattern_filter):,} bytes")
+    print(f"Optimized Pattern Filter: {sys.getsizeof(fast_pattern_filter):,} bytes")
+    print(f"Original Content Filter: {sys.getsizeof(content_filter):,} bytes")
+    print(f"Optimized Content Filter: {sys.getsizeof(fast_content_filter):,} bytes")
+    print(f"Original Domain Filter: {sys.getsizeof(domain_filter):,} bytes")
+    print(f"Optimized Domain Filter: {sys.getsizeof(fast_domain_filter):,} bytes")
+
+def test_pattern_filter():
+    import time
+    from itertools import chain
+
+    # Test cases as list of tuples instead of dict for multiple patterns
+    test_cases = [
+        # Simple suffix patterns (*.html)
+        ("*.html", {
+            "https://example.com/page.html": True,
+            "https://example.com/path/doc.html": True,
+            "https://example.com/page.htm": False,
+            "https://example.com/page.html?param=1": True,
+        }),
+        
+        # Path prefix patterns (/foo/*)
+        ("*/article/*", {
+            "https://example.com/article/123": True,
+            "https://example.com/blog/article/456": True,
+            "https://example.com/articles/789": False,
+            "https://example.com/article": False,
+        }),
+        
+        # Complex patterns
+        ("blog-*-[0-9]", {
+            "https://example.com/blog-post-1": True,
+            "https://example.com/blog-test-9": True,
+            "https://example.com/blog-post": False,
+            "https://example.com/blog-post-x": False,
+        }),
+        
+        # Multiple patterns case
+        (["*.pdf", "*/download/*"], {
+            "https://example.com/doc.pdf": True,
+            "https://example.com/download/file.txt": True,
+            "https://example.com/path/download/doc": True,
+            "https://example.com/uploads/file.txt": False,
+        }),
+        
+        # Edge cases
+        ("*", {
+            "https://example.com": True,
+            "": True,
+            "http://test.com/path": True,
+        }),
+        
+        # Complex regex
+        (r"^https?://.*\.example\.com/\d+", {
+            "https://sub.example.com/123": True,
+            "http://test.example.com/456": True,
+            "https://example.com/789": False,
+            "https://sub.example.com/abc": False,
+        })
+    ]
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for patterns, test_urls in test_cases:
+            filter_obj = FastURLPatternFilter(patterns)
+            
+            for url, expected in test_urls.items():
+                result = filter_obj.apply(url)
+                if result != expected:
+                    print(f"❌ Failed: Pattern '{patterns}' with URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {result}")
+                    all_passed = False
+                else:
+                    print(f"✅ Passed: Pattern '{patterns}' with URL '{url}'")
+        
+        return all_passed
+
+    def run_speed_test():
+        print("\nSpeed Tests:")
+        print("-" * 50)
+        
+        # Create a large set of test URLs
+        all_urls = list(chain.from_iterable(urls.keys() for _, urls in test_cases))
+        test_urls = all_urls * 10000  # 100K+ URLs
+        
+        # Test both implementations
+        original = URLPatternFilter(["*.html", "*/article/*", "blog-*"])
+        optimized = FastURLPatternFilter(["*.html", "*/article/*", "blog-*"])
+        
+        def benchmark(name, filter_obj):
+            start = time.perf_counter()
+            for url in test_urls:
+                filter_obj.apply(url)
+            elapsed = time.perf_counter() - start
+            urls_per_sec = len(test_urls) / elapsed
+            print(f"{name:<20} {elapsed:.3f}s ({urls_per_sec:,.0f} URLs/sec)")
+        
+        benchmark("Original Filter:", original)
+        benchmark("Optimized Filter:", optimized)
+
+    # Run tests
+    print("Running Pattern Filter Tests...")
+    accuracy_passed = run_accuracy_test()
     
-    print("\nBenchmarking individual filters...")
-    for url in test_urls[:5]:  # Show first 5 results
-        print(f"\nTesting URL: {url}")
-        benchmark("Pattern Filter", pattern_filter.apply, url)
-        benchmark("Content Filter", content_filter.apply, url)
-        benchmark("Domain Filter", domain_filter.apply, url)
-    
-    print("\nBenchmarking full chain...")
-    benchmark("Full Chain", lambda: [chain.apply(url) for url in test_urls], None)
+    if accuracy_passed:
+        print("\n✨ All accuracy tests passed!")
+        run_speed_test()
+    else:
+        print("\n❌ Some accuracy tests failed!")
 
 if __name__ == "__main__":
-    run_performance_test()
\ No newline at end of file
+    run_performance_test()
+    # test_pattern_filter()
diff --git a/crawl4ai/scraper/filters_review.py b/crawl4ai/scraper/filters_review.py
deleted file mode 100644
index c67244c0..00000000
--- a/crawl4ai/scraper/filters_review.py
+++ /dev/null
@@ -1,872 +0,0 @@
-# from .url_filter import URLFilter, FilterChain
-# from .content_type_filter import ContentTypeFilter
-# from .url_pattern_filter import URLPatternFilter
-
-from abc import ABC, abstractmethod
-from typing import List, Pattern, Set, Union, FrozenSet
-import re, time
-from urllib.parse import urlparse
-from array import array
-import logging
-from functools import lru_cache
-import fnmatch
-from dataclasses import dataclass
-from typing import ClassVar
-import weakref
-import mimetypes
-
-
-@dataclass
-class FilterStats:
-    # PERF: Using dataclass creates overhead with __init__ and property access
-    # PERF: Could use __slots__ to reduce memory footprint
-    # PERF: Consider using array.array('I') for atomic increments
-    total_urls: int = 0
-    rejected_urls: int = 0
-    passed_urls: int = 0
-
-
-class URLFilter(ABC):
-    # PERF: Logger creation is expensive, consider lazy initialization
-    # PERF: stats object creation adds overhead for each filter instance
-    def __init__(self, name: str = None):
-        self.name = name or self.__class__.__name__
-        self.stats = FilterStats()
-        self.logger = logging.getLogger(f"urlfilter.{self.name}")
-
-    @abstractmethod
-    def apply(self, url: str) -> bool:
-        pass
-
-    def _update_stats(self, passed: bool):
-        # PERF: Already optimized but could use bitwise operations
-        # PERF: Consider removing stats entirely in production/fast mode
-        self.stats.total_urls += 1
-        self.stats.passed_urls += passed
-        self.stats.rejected_urls += not passed
-
-
-class FilterChain:
-    # PERF: List traversal for each URL is expensive
-    # PERF: Could use array.array instead of list for filters
-    # PERF: Consider adding fast path for single filter case
-    def __init__(self, filters: List[URLFilter] = None):
-        self.filters = filters or []
-        self.stats = FilterStats()
-        self.logger = logging.getLogger("urlfilter.chain")
-
-    def apply(self, url: str) -> bool:
-        # PERF: Logging on every rejection is expensive
-        # PERF: Could reorder filters by rejection rate
-        # PERF: Consider batch processing mode
-        self.stats.total_urls += 1
-
-        for filter_ in self.filters:
-            if not filter_.apply(url):
-                self.stats.rejected_urls += 1
-                self.logger.debug(f"URL {url} rejected by {filter_.name}")
-                return False
-
-        self.stats.passed_urls += 1
-        return True
-
-
-class URLPatternFilter(URLFilter):
-    # PERF: Converting glob to regex is expensive
-    # PERF: Multiple regex compilation is slow
-    # PERF: List of patterns causes multiple regex evaluations
-    def __init__(
-        self,
-        patterns: Union[str, Pattern, List[Union[str, Pattern]]],
-        use_glob: bool = True,
-    ):
-        super().__init__()
-        self.patterns = [patterns] if isinstance(patterns, (str, Pattern)) else patterns
-        self.use_glob = use_glob
-        self._compiled_patterns = []
-
-        # PERF: This could be consolidated into a single regex with OR conditions
-        # PERF: glob_to_regex creates complex patterns, could be simplified
-        for pattern in self.patterns:
-            if isinstance(pattern, str) and use_glob:
-                self._compiled_patterns.append(self._glob_to_regex(pattern))
-            else:
-                self._compiled_patterns.append(
-                    re.compile(pattern) if isinstance(pattern, str) else pattern
-                )
-
-    def _glob_to_regex(self, pattern: str) -> Pattern:
-        # PERF: fnmatch.translate creates overly complex patterns
-        # PERF: Could cache common translations
-        return re.compile(fnmatch.translate(pattern))
-
-    def apply(self, url: str) -> bool:
-        # PERF: any() with generator is slower than direct loop with early return
-        # PERF: searching entire string is slower than anchored match
-        matches = any(pattern.search(url) for pattern in self._compiled_patterns)
-        self._update_stats(matches)
-        return matches
-
-
-class ContentTypeFilter(URLFilter):
-    # PERF: mimetypes guessing is extremely slow
-    # PERF: URL parsing on every check is expensive
-    # PERF: No caching of results for similar extensions
-    def __init__(
-        self, allowed_types: Union[str, List[str]], check_extension: bool = True
-    ):
-        super().__init__()
-        self.allowed_types = (
-            [allowed_types] if isinstance(allowed_types, str) else allowed_types
-        )
-        self.check_extension = check_extension
-        self._normalize_types()
-
-    def _normalize_types(self):
-        """Normalize content type strings"""
-        self.allowed_types = [t.lower() for t in self.allowed_types]
-
-    def _check_extension(self, url: str) -> bool:
-        # PERF: urlparse is called on every check
-        # PERF: multiple string splits are expensive
-        # PERF: mimetypes.guess_type is very slow
-        ext = (
-            urlparse(url).path.split(".")[-1].lower()
-            if "." in urlparse(url).path
-            else ""
-        )
-        if not ext:
-            return True
-
-        # PERF: guess_type is main bottleneck
-        guessed_type = mimetypes.guess_type(url)[0]
-        return any(
-            allowed in (guessed_type or "").lower() for allowed in self.allowed_types
-        )
-
-    def apply(self, url: str) -> bool:
-        """Check if URL's content type is allowed"""
-        result = True
-        if self.check_extension:
-            result = self._check_extension(url)
-        self._update_stats(result)
-        return result
-
-
-class DomainFilter(URLFilter):
-    # PERF: Set lookups are fast but string normalizations on init are not
-    # PERF: Creating two sets doubles memory usage
-    def __init__(
-        self,
-        allowed_domains: Union[str, List[str]] = None,
-        blocked_domains: Union[str, List[str]] = None,
-    ):
-        super().__init__()
-        # PERF: Normalizing domains on every init is wasteful
-        # PERF: Could use frozenset for immutable lists
-        self.allowed_domains = (
-            set(self._normalize_domains(allowed_domains)) if allowed_domains else None
-        )
-        self.blocked_domains = (
-            set(self._normalize_domains(blocked_domains)) if blocked_domains else set()
-        )
-
-    def _normalize_domains(self, domains: Union[str, List[str]]) -> List[str]:
-        # PERF: strip() and lower() create new strings for each domain
-        # PERF: List comprehension creates intermediate list
-        if isinstance(domains, str):
-            domains = [domains]
-        return [d.lower().strip() for d in domains]
-
-    def _extract_domain(self, url: str) -> str:
-        # PERF: urlparse is called for every URL check
-        # PERF: lower() creates new string every time
-        # PERF: Could cache recent results
-        return urlparse(url).netloc.lower()
-
-    def apply(self, url: str) -> bool:
-        # PERF: Two separate set lookups in worst case
-        # PERF: Domain extraction happens before knowing if we have any filters
-        domain = self._extract_domain(url)
-
-        if domain in self.blocked_domains:
-            self._update_stats(False)
-            return False
-
-        if self.allowed_domains is not None and domain not in self.allowed_domains:
-            self._update_stats(False)
-            return False
-
-        self._update_stats(True)
-        return True
-
-
-# Example usage:
-def create_common_filter_chain() -> FilterChain:
-    """Create a commonly used filter chain"""
-    return FilterChain(
-        [
-            URLPatternFilter(
-                [
-                    "*.html",
-                    "*.htm",  # HTML files
-                    "*/article/*",
-                    "*/blog/*",  # Common content paths
-                ]
-            ),
-            ContentTypeFilter(["text/html", "application/xhtml+xml"]),
-            DomainFilter(blocked_domains=["ads.*", "analytics.*"]),
-        ]
-    )
-
-
-####################################################################################
-# Uncledoe: Optimized Version
-####################################################################################
-
-
-# Use __slots__ and array for maximum memory/speed efficiency
-class FastFilterStats:
-    __slots__ = ("_counters",)
-
-    def __init__(self):
-        # Use array of unsigned ints for atomic operations
-        self._counters = array("I", [0, 0, 0])  # total, passed, rejected
-
-    @property
-    def total_urls(self):
-        return self._counters[0]
-
-    @property
-    def passed_urls(self):
-        return self._counters[1]
-
-    @property
-    def rejected_urls(self):
-        return self._counters[2]
-
-
-class FastURLFilter(ABC):
-    """Optimized base filter class"""
-
-    __slots__ = ("name", "stats", "_logger_ref")
-
-    def __init__(self, name: str = None):
-        self.name = name or self.__class__.__name__
-        self.stats = FastFilterStats()
-        # Lazy logger initialization using weakref
-        self._logger_ref = None
-
-    @property
-    def logger(self):
-        if self._logger_ref is None or self._logger_ref() is None:
-            logger = logging.getLogger(f"urlfilter.{self.name}")
-            self._logger_ref = weakref.ref(logger)
-        return self._logger_ref()
-
-    @abstractmethod
-    def apply(self, url: str) -> bool:
-        pass
-
-    def _update_stats(self, passed: bool):
-        # Use direct array index for speed
-        self.stats._counters[0] += 1  # total
-        self.stats._counters[1] += passed  # passed
-        self.stats._counters[2] += not passed  # rejected
-
-
-class FastFilterChain:
-    """Optimized filter chain"""
-
-    __slots__ = ("filters", "stats", "_logger_ref")
-
-    def __init__(self, filters: List[FastURLFilter] = None):
-        self.filters = tuple(filters or [])  # Immutable tuple for speed
-        self.stats = FastFilterStats()
-        self._logger_ref = None
-
-    @property
-    def logger(self):
-        if self._logger_ref is None or self._logger_ref() is None:
-            logger = logging.getLogger("urlfilter.chain")
-            self._logger_ref = weakref.ref(logger)
-        return self._logger_ref()
-
-    def add_filter(self, filter_: FastURLFilter) -> "FastFilterChain":
-        """Add a filter to the chain"""
-        self.filters.append(filter_)
-        return self  # Enable method chaining
-
-    def apply(self, url: str) -> bool:
-        """Optimized apply with minimal operations"""
-        self.stats._counters[0] += 1  # total
-
-        # Direct tuple iteration is faster than list
-        for f in self.filters:
-            if not f.apply(url):
-                self.stats._counters[2] += 1  # rejected
-                return False
-
-        self.stats._counters[1] += 1  # passed
-        return True
-
-class FastURLPatternFilter(FastURLFilter):
-    """Pattern filter balancing speed and completeness"""
-    __slots__ = ('_simple_suffixes', '_simple_prefixes', '_domain_patterns', '_path_patterns')
-    
-    PATTERN_TYPES = {
-        'SUFFIX': 1,    # *.html
-        'PREFIX': 2,    # /foo/*
-        'DOMAIN': 3,    # *.example.com
-        'PATH': 4 ,      # Everything else
-        'REGEX': 5 
-    }
-    
-    def __init__(self, patterns: Union[str, Pattern, List[Union[str, Pattern]]], use_glob: bool = True):
-        super().__init__()
-        patterns = [patterns] if isinstance(patterns, (str, Pattern)) else patterns
-        
-        self._simple_suffixes = set()
-        self._simple_prefixes = set()
-        self._domain_patterns = []
-        self._path_patterns = []
-        
-        for pattern in patterns:
-            pattern_type = self._categorize_pattern(pattern)
-            self._add_pattern(pattern, pattern_type)
-    
-    def _categorize_pattern(self, pattern: str) -> int:
-        """Categorize pattern for specialized handling"""
-        if not isinstance(pattern, str):
-            return self.PATTERN_TYPES['PATH']
-            
-        # Check if it's a regex pattern
-        if pattern.startswith('^') or pattern.endswith('$') or '\\d' in pattern:
-            return self.PATTERN_TYPES['REGEX']
-        
-        if pattern.count('*') == 1:
-            if pattern.startswith('*.'):
-                return self.PATTERN_TYPES['SUFFIX']
-            if pattern.endswith('/*'):
-                return self.PATTERN_TYPES['PREFIX']
-                
-        if '://' in pattern and pattern.startswith('*.'):
-            return self.PATTERN_TYPES['DOMAIN']
-            
-        return self.PATTERN_TYPES['PATH']
-    
-    def _add_pattern(self, pattern: str, pattern_type: int):
-        """Add pattern to appropriate matcher"""
-        if pattern_type == self.PATTERN_TYPES['REGEX']:
-            # For regex patterns, compile directly without glob translation
-            if isinstance(pattern, str) and (pattern.startswith('^') or pattern.endswith('$') or '\\d' in pattern):
-                self._path_patterns.append(re.compile(pattern))
-                return
-        elif pattern_type == self.PATTERN_TYPES['SUFFIX']:
-            self._simple_suffixes.add(pattern[2:])
-        elif pattern_type == self.PATTERN_TYPES['PREFIX']:
-            self._simple_prefixes.add(pattern[:-2])
-        elif pattern_type == self.PATTERN_TYPES['DOMAIN']:
-            self._domain_patterns.append(
-                re.compile(pattern.replace('*.', r'[^/]+\.'))
-            )
-        else:
-            if isinstance(pattern, str):
-                # Handle complex glob patterns
-                if '**' in pattern:
-                    pattern = pattern.replace('**', '.*')
-                if '{' in pattern:
-                    # Convert {a,b} to (a|b)
-                    pattern = re.sub(r'\{([^}]+)\}', 
-                                   lambda m: f'({"|".join(m.group(1).split(","))})',
-                                   pattern)
-                pattern = fnmatch.translate(pattern)
-            self._path_patterns.append(
-                pattern if isinstance(pattern, Pattern) else re.compile(pattern)
-            )
-
-    @lru_cache(maxsize=10000)
-    def apply(self, url: str) -> bool:
-        """Hierarchical pattern matching"""
-        # Quick suffix check (*.html)
-        if self._simple_suffixes:
-            path = url.split('?')[0]
-            if path.split('/')[-1].split('.')[-1] in self._simple_suffixes:
-                self._update_stats(True)
-                return True
-                
-        # Domain check
-        if self._domain_patterns:
-            for pattern in self._domain_patterns:
-                if pattern.match(url):
-                    self._update_stats(True)
-                    return True
-        
-        # Prefix check (/foo/*)
-        if self._simple_prefixes:
-            path = url.split('?')[0]
-            if any(path.startswith(p) for p in self._simple_prefixes):
-                self._update_stats(True)
-                return True
-                
-        # Complex patterns
-        if self._path_patterns:
-            if any(p.search(url) for p in self._path_patterns):
-                self._update_stats(True)
-                return True
-        
-        self._update_stats(False)
-        return False
-
-
-class FastContentTypeFilter(FastURLFilter):
-    """Optimized content type filter using fast lookups"""
-
-    __slots__ = ("allowed_types", "_ext_map", "_check_extension")
-
-    # Fast extension to mime type mapping
-    _MIME_MAP = {
-        # Text Formats
-        "txt": "text/plain",
-        "html": "text/html",
-        "htm": "text/html",
-        "xhtml": "application/xhtml+xml",
-        "css": "text/css",
-        "csv": "text/csv",
-        "ics": "text/calendar",
-        "js": "application/javascript",
-        # Images
-        "bmp": "image/bmp",
-        "gif": "image/gif",
-        "jpeg": "image/jpeg",
-        "jpg": "image/jpeg",
-        "png": "image/png",
-        "svg": "image/svg+xml",
-        "tiff": "image/tiff",
-        "ico": "image/x-icon",
-        "webp": "image/webp",
-        # Audio
-        "mp3": "audio/mpeg",
-        "wav": "audio/wav",
-        "ogg": "audio/ogg",
-        "m4a": "audio/mp4",
-        "aac": "audio/aac",
-        # Video
-        "mp4": "video/mp4",
-        "mpeg": "video/mpeg",
-        "webm": "video/webm",
-        "avi": "video/x-msvideo",
-        "mov": "video/quicktime",
-        "flv": "video/x-flv",
-        "wmv": "video/x-ms-wmv",
-        "mkv": "video/x-matroska",
-        # Applications
-        "json": "application/json",
-        "xml": "application/xml",
-        "pdf": "application/pdf",
-        "zip": "application/zip",
-        "gz": "application/gzip",
-        "tar": "application/x-tar",
-        "rar": "application/vnd.rar",
-        "7z": "application/x-7z-compressed",
-        "exe": "application/vnd.microsoft.portable-executable",
-        "msi": "application/x-msdownload",
-        # Fonts
-        "woff": "font/woff",
-        "woff2": "font/woff2",
-        "ttf": "font/ttf",
-        "otf": "font/otf",
-        # Microsoft Office
-        "doc": "application/msword",
-        "dot": "application/msword",
-        "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        "xls": "application/vnd.ms-excel",
-        "ppt": "application/vnd.ms-powerpoint",
-        "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-        # OpenDocument Formats
-        "odt": "application/vnd.oasis.opendocument.text",
-        "ods": "application/vnd.oasis.opendocument.spreadsheet",
-        "odp": "application/vnd.oasis.opendocument.presentation",
-        # Archives
-        "tar.gz": "application/gzip",
-        "tgz": "application/gzip",
-        "bz2": "application/x-bzip2",
-        # Others
-        "rtf": "application/rtf",
-        "apk": "application/vnd.android.package-archive",
-        "epub": "application/epub+zip",
-        "jar": "application/java-archive",
-        "swf": "application/x-shockwave-flash",
-        "midi": "audio/midi",
-        "mid": "audio/midi",
-        "ps": "application/postscript",
-        "ai": "application/postscript",
-        "eps": "application/postscript",
-        # Custom or less common
-        "bin": "application/octet-stream",
-        "dmg": "application/x-apple-diskimage",
-        "iso": "application/x-iso9660-image",
-        "deb": "application/x-debian-package",
-        "rpm": "application/x-rpm",
-        "sqlite": "application/vnd.sqlite3",
-        # Placeholder
-        "unknown": "application/octet-stream",  # Fallback for unknown file types
-    }
-
-    @staticmethod
-    @lru_cache(maxsize=1000)
-    def _extract_extension(path: str) -> str:
-        """Fast extension extraction with caching"""
-        if "." not in path:
-            return ""
-        return path.rpartition(".")[-1].lower()
-
-    def __init__(
-        self, allowed_types: Union[str, List[str]], check_extension: bool = True
-    ):
-        super().__init__()
-        # Normalize and store as frozenset for fast lookup
-        self.allowed_types = frozenset(
-            t.lower()
-            for t in (
-                allowed_types if isinstance(allowed_types, list) else [allowed_types]
-            )
-        )
-        self._check_extension = check_extension
-
-        # Pre-compute extension map for allowed types
-        self._ext_map = frozenset(
-            ext
-            for ext, mime in self._MIME_MAP.items()
-            if any(allowed in mime for allowed in self.allowed_types)
-        )
-
-    @lru_cache(maxsize=1000)
-    def _check_url_cached(self, url: str) -> bool:
-        """Cached URL checking"""
-        if not self._check_extension:
-            return True
-
-        path = url.split("?")[0]  # Fast path split
-        ext = self._extract_extension(path)
-        if not ext:
-            return True
-
-        return ext in self._ext_map
-
-    def apply(self, url: str) -> bool:
-        """Fast extension check with caching"""
-        result = self._check_url_cached(url)
-        self._update_stats(result)
-        return result
-
-
-class FastDomainFilter(FastURLFilter):
-    """Optimized domain filter with fast lookups and caching"""
-
-    __slots__ = ("_allowed_domains", "_blocked_domains", "_domain_cache")
-
-    # Regex for fast domain extraction
-    _DOMAIN_REGEX = re.compile(r"://([^/]+)")
-
-    def __init__(
-        self,
-        allowed_domains: Union[str, List[str]] = None,
-        blocked_domains: Union[str, List[str]] = None,
-    ):
-        super().__init__()
-
-        # Convert inputs to frozensets for immutable, fast lookups
-        self._allowed_domains = (
-            frozenset(self._normalize_domains(allowed_domains))
-            if allowed_domains
-            else None
-        )
-        self._blocked_domains = (
-            frozenset(self._normalize_domains(blocked_domains))
-            if blocked_domains
-            else frozenset()
-        )
-
-    @staticmethod
-    def _normalize_domains(domains: Union[str, List[str]]) -> Set[str]:
-        """Fast domain normalization"""
-        if isinstance(domains, str):
-            return {domains.lower()}
-        return {d.lower() for d in domains}
-
-    @staticmethod
-    @lru_cache(maxsize=10000)
-    def _extract_domain(url: str) -> str:
-        """Ultra-fast domain extraction with regex and caching"""
-        match = FastDomainFilter._DOMAIN_REGEX.search(url)
-        return match.group(1).lower() if match else ""
-
-    def apply(self, url: str) -> bool:
-        """Optimized domain checking with early returns"""
-        # Skip processing if no filters
-        if not self._blocked_domains and self._allowed_domains is None:
-            self._update_stats(True)
-            return True
-
-        domain = self._extract_domain(url)
-
-        # Early return for blocked domains
-        if domain in self._blocked_domains:
-            self._update_stats(False)
-            return False
-
-        # If no allowed domains specified, accept all non-blocked
-        if self._allowed_domains is None:
-            self._update_stats(True)
-            return True
-
-        # Final allowed domains check
-        result = domain in self._allowed_domains
-        self._update_stats(result)
-        return result
-
-
-def create_fast_filter_chain() -> FastFilterChain:
-    """Create an optimized filter chain with filters ordered by rejection rate"""
-    return FastFilterChain(
-        [
-            # Domain filter first (fastest rejection)
-            FastDomainFilter(blocked_domains=["ads.*", "analytics.*"]),
-            # Content filter second (medium speed)
-            FastContentTypeFilter(["text/html", "application/xhtml+xml"]),
-            # Pattern filter last (most expensive)
-            FastURLPatternFilter(
-                [
-                    "*.html",
-                    "*.htm",
-                    "*/article/*",
-                    "*/blog/*",
-                ]
-            ),
-        ]
-    )
-
-
-def run_performance_test():
-    import time
-    import random
-    from itertools import cycle
-
-    # Generate test URLs
-    base_urls = [
-        "https://example.com/article/123",
-        "https://blog.example.com/post/456",
-        "https://ads.example.com/tracking",
-        "https://example.com/about.html",
-        "https://analytics.example.com/script.js",
-        "https://example.com/products.php",
-        "https://subdomain.example.com/blog/post-123",
-        "https://example.com/path/file.pdf",
-    ]
-
-    # Create more varied test data
-    test_urls = []
-    for base in base_urls:
-        # Add original
-        test_urls.append(base)
-        # Add variations
-        parts = base.split("/")
-        for i in range(10):
-            parts[-1] = f"page_{i}.html"
-            test_urls.append("/".join(parts))
-
-    # Multiply to get enough test data
-    test_urls = test_urls * 10000  # Creates ~800k URLs
-
-    def benchmark(name: str, func, *args, warmup=True):
-        if warmup:
-            # Warmup run
-            func(*args)
-
-        # Actual timing
-        start = time.perf_counter_ns()
-        result = func(*args)
-        elapsed = (time.perf_counter_ns() - start) / 1_000_000  # Convert to ms
-        print(
-            f"{name:<30} {elapsed:>8.3f} ms  ({len(test_urls)/elapsed*1000:,.0f} URLs/sec)"
-        )
-        return result
-
-    print("\nBenchmarking original vs optimized implementations...")
-    print("-" * 70)
-
-    # Original implementation
-    pattern_filter = URLPatternFilter(["*.html", "*/article/*"])
-    content_filter = ContentTypeFilter(["text/html"])
-    domain_filter = DomainFilter(blocked_domains=["ads.*", "analytics.*"])
-    chain = FilterChain([pattern_filter, content_filter, domain_filter])
-
-    # Optimized implementation
-    fast_pattern_filter = FastURLPatternFilter(["*.html", "*/article/*"])
-    fast_content_filter = FastContentTypeFilter(["text/html"])
-    fast_domain_filter = FastDomainFilter(blocked_domains=["ads.*", "analytics.*"])
-    fast_chain = FastFilterChain(
-        [fast_domain_filter, fast_content_filter, fast_pattern_filter]
-    )
-
-    # Test individual filters
-    print("\nSingle filter performance (first 1000 URLs):")
-    test_subset = test_urls[:1000]
-
-    print("\nPattern Filters:")
-    benchmark(
-        "Original Pattern Filter",
-        lambda: [pattern_filter.apply(url) for url in test_subset],
-    )
-    benchmark(
-        "Optimized Pattern Filter",
-        lambda: [fast_pattern_filter.apply(url) for url in test_subset],
-    )
-
-    print("\nContent Filters:")
-    benchmark(
-        "Original Content Filter",
-        lambda: [content_filter.apply(url) for url in test_subset],
-    )
-    benchmark(
-        "Optimized Content Filter",
-        lambda: [fast_content_filter.apply(url) for url in test_subset],
-    )
-
-    print("\nDomain Filters:")
-    benchmark(
-        "Original Domain Filter",
-        lambda: [domain_filter.apply(url) for url in test_subset],
-    )
-    benchmark(
-        "Optimized Domain Filter",
-        lambda: [fast_domain_filter.apply(url) for url in test_subset],
-    )
-
-    print("\nFull Chain Performance (all URLs):")
-    # Test chain
-    benchmark("Original Chain", lambda: [chain.apply(url) for url in test_urls])
-    benchmark("Optimized Chain", lambda: [fast_chain.apply(url) for url in test_urls])
-
-    # Memory usage
-    import sys
-
-    print("\nMemory Usage per Filter:")
-    print(f"Original Pattern Filter: {sys.getsizeof(pattern_filter):,} bytes")
-    print(f"Optimized Pattern Filter: {sys.getsizeof(fast_pattern_filter):,} bytes")
-    print(f"Original Content Filter: {sys.getsizeof(content_filter):,} bytes")
-    print(f"Optimized Content Filter: {sys.getsizeof(fast_content_filter):,} bytes")
-    print(f"Original Domain Filter: {sys.getsizeof(domain_filter):,} bytes")
-    print(f"Optimized Domain Filter: {sys.getsizeof(fast_domain_filter):,} bytes")
-
-def test_pattern_filter():
-    import time
-    from itertools import chain
-
-    # Test cases as list of tuples instead of dict for multiple patterns
-    test_cases = [
-        # Simple suffix patterns (*.html)
-        ("*.html", {
-            "https://example.com/page.html": True,
-            "https://example.com/path/doc.html": True,
-            "https://example.com/page.htm": False,
-            "https://example.com/page.html?param=1": True,
-        }),
-        
-        # Path prefix patterns (/foo/*)
-        ("*/article/*", {
-            "https://example.com/article/123": True,
-            "https://example.com/blog/article/456": True,
-            "https://example.com/articles/789": False,
-            "https://example.com/article": False,
-        }),
-        
-        # Complex patterns
-        ("blog-*-[0-9]", {
-            "https://example.com/blog-post-1": True,
-            "https://example.com/blog-test-9": True,
-            "https://example.com/blog-post": False,
-            "https://example.com/blog-post-x": False,
-        }),
-        
-        # Multiple patterns case
-        (["*.pdf", "*/download/*"], {
-            "https://example.com/doc.pdf": True,
-            "https://example.com/download/file.txt": True,
-            "https://example.com/path/download/doc": True,
-            "https://example.com/uploads/file.txt": False,
-        }),
-        
-        # Edge cases
-        ("*", {
-            "https://example.com": True,
-            "": True,
-            "http://test.com/path": True,
-        }),
-        
-        # Complex regex
-        (r"^https?://.*\.example\.com/\d+", {
-            "https://sub.example.com/123": True,
-            "http://test.example.com/456": True,
-            "https://example.com/789": False,
-            "https://sub.example.com/abc": False,
-        })
-    ]
-
-    def run_accuracy_test():
-        print("\nAccuracy Tests:")
-        print("-" * 50)
-        
-        all_passed = True
-        for patterns, test_urls in test_cases:
-            filter_obj = FastURLPatternFilter(patterns)
-            
-            for url, expected in test_urls.items():
-                result = filter_obj.apply(url)
-                if result != expected:
-                    print(f"❌ Failed: Pattern '{patterns}' with URL '{url}'")
-                    print(f"   Expected: {expected}, Got: {result}")
-                    all_passed = False
-                else:
-                    print(f"✅ Passed: Pattern '{patterns}' with URL '{url}'")
-        
-        return all_passed
-
-    def run_speed_test():
-        print("\nSpeed Tests:")
-        print("-" * 50)
-        
-        # Create a large set of test URLs
-        all_urls = list(chain.from_iterable(urls.keys() for _, urls in test_cases))
-        test_urls = all_urls * 10000  # 100K+ URLs
-        
-        # Test both implementations
-        original = URLPatternFilter(["*.html", "*/article/*", "blog-*"])
-        optimized = FastURLPatternFilter(["*.html", "*/article/*", "blog-*"])
-        
-        def benchmark(name, filter_obj):
-            start = time.perf_counter()
-            for url in test_urls:
-                filter_obj.apply(url)
-            elapsed = time.perf_counter() - start
-            urls_per_sec = len(test_urls) / elapsed
-            print(f"{name:<20} {elapsed:.3f}s ({urls_per_sec:,.0f} URLs/sec)")
-        
-        benchmark("Original Filter:", original)
-        benchmark("Optimized Filter:", optimized)
-
-    # Run tests
-    print("Running Pattern Filter Tests...")
-    accuracy_passed = run_accuracy_test()
-    
-    if accuracy_passed:
-        print("\n✨ All accuracy tests passed!")
-        run_speed_test()
-    else:
-        print("\n❌ Some accuracy tests failed!")
-
-if __name__ == "__main__":
-    run_performance_test()
-    # test_pattern_filter()
diff --git a/crawl4ai/scraper/scorers.py b/crawl4ai/scraper/scorers.py
index 8552aa8e..4ef90dcd 100644
--- a/crawl4ai/scraper/scorers.py
+++ b/crawl4ai/scraper/scorers.py
@@ -1,6 +1,3 @@
-# from .url_scorer import URLScorer
-# from .keyword_relevance_scorer import KeywordRelevanceScorer
-
 from abc import ABC, abstractmethod
 from typing import List, Dict, Optional, Union
 from dataclasses import dataclass
@@ -9,17 +6,42 @@ import re
 from collections import defaultdict
 import math
 import logging
+from functools import lru_cache
+from array import array
+from functools import lru_cache
+import ctypes
+import platform
+PLATFORM = platform.system()
+
+# Pre-computed scores for common year differences
+_SCORE_LOOKUP = [1.0, 0.5, 0.3333333333333333, 0.25]
+
+# Pre-computed scores for common year differences
+_FRESHNESS_SCORES = [
+   1.0,    # Current year
+   0.9,    # Last year
+   0.8,    # 2 years ago
+   0.7,    # 3 years ago
+   0.6,    # 4 years ago
+   0.5,    # 5 years ago
+]
+
+# Pre-computed normalization factors for powers of 2
+_POW2_NORM = [1.0, 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625]
 
 
 @dataclass
 class ScoringStats:
-    """Statistics for URL scoring"""
-
+    # PERF: Dataclass introduces overhead with property access and __init__
+    # PERF: Float operations and comparisons are expensive for high-frequency updates
+    # PERF: Property calculation on every access is inefficient
+    # PERF: Storing min/max adds memory overhead and comparison costs
+    # PERF: Using inf/-inf creates unnecessary float objects
     urls_scored: int = 0
     total_score: float = 0.0
-    min_score: float = float("inf")
+    min_score: float = float("inf")  # Expensive object creation
     max_score: float = float("-inf")
-
+    
     def update(self, score: float):
         """Update scoring statistics"""
         self.urls_scored += 1
@@ -32,9 +54,50 @@ class ScoringStats:
         """Calculate average score"""
         return self.total_score / self.urls_scored if self.urls_scored > 0 else 0.0
 
+class FastScoringStats:
+    __slots__ = ('_urls_scored', '_total_score', '_min_score', '_max_score')
+    
+    def __init__(self):
+        self._urls_scored = 0
+        self._total_score = 0.0
+        self._min_score = None  # Lazy initialization
+        self._max_score = None
+    
+    def update(self, score: float) -> None:
+        """Optimized update with minimal operations"""
+        self._urls_scored += 1
+        self._total_score += score
+        
+        # Lazy min/max tracking - only if actually accessed
+        if self._min_score is not None:
+            if score < self._min_score:
+                self._min_score = score
+        if self._max_score is not None:
+            if score > self._max_score:
+                self._max_score = score
+                
+    def get_average(self) -> float:
+        """Direct calculation instead of property"""
+        return self._total_score / self._urls_scored if self._urls_scored else 0.0
+    
+    def get_min(self) -> float:
+        """Lazy min calculation"""
+        if self._min_score is None:
+            self._min_score = self._total_score / self._urls_scored if self._urls_scored else 0.0
+        return self._min_score
+        
+    def get_max(self) -> float:
+        """Lazy max calculation"""
+        if self._max_score is None:
+            self._max_score = self._total_score / self._urls_scored if self._urls_scored else 0.0
+        return self._max_score
 
 class URLScorer(ABC):
-    """Base class for URL scoring strategies"""
+    # PERF: Property access overhead for weight
+    # PERF: Unnecessary name attribute
+    # PERF: Stats object creation overhead
+    # PERF: Logger creation for each instance
+    # PERF: Abstract method overhead
 
     def __init__(self, weight: float = 1.0, name: str = None):
         self.weight = weight
@@ -44,18 +107,52 @@ class URLScorer(ABC):
 
     @abstractmethod
     def _calculate_score(self, url: str) -> float:
-        """Calculate the raw score for a URL"""
         pass
 
     def score(self, url: str) -> float:
-        """Calculate the weighted score for a URL"""
         raw_score = self._calculate_score(url)
         weighted_score = raw_score * self.weight
         self.stats.update(weighted_score)
         return weighted_score
 
+# Optimized base class
+class FastURLScorer(ABC):
+    __slots__ = ('_weight', '_stats')
+    
+    def __init__(self, weight: float = 1.0):
+        # Store weight directly as float32 for memory efficiency
+        self._weight = ctypes.c_float(weight).value
+        self._stats = ScoringStats()
+    
+    @abstractmethod
+    def _calculate_score(self, url: str) -> float:
+        """Calculate raw score for URL."""
+        pass
+    
+    def score(self, url: str) -> float:
+        """Calculate weighted score with minimal overhead."""
+        score = self._calculate_score(url) * self._weight
+        self._stats.update(score)
+        return score
+    
+    @property
+    def stats(self):
+        """Access to scoring statistics."""
+        return self._stats
+    
+    @property
+    def weight(self):
+        return self._weight
 
 class CompositeScorer(URLScorer):
+    # PERF: Unnecessary list iteration for each score
+    # PERF: Creates new list for scores
+    # PERF: Division on every normalization
+    # PERF: No parallelization for independent scorers
+    # PERF: No short circuit for zero scores
+    # PERF: No weighting optimization
+    # PERF: No caching of combined scores
+    # PERF: List allocation for scores storag
     """Combines multiple scorers with weights"""
 
     def __init__(self, scorers: List[URLScorer], normalize: bool = True):
@@ -72,8 +169,84 @@ class CompositeScorer(URLScorer):
 
         return total_score
 
+class FastCompositeScorer(FastURLScorer):
+    __slots__ = ('_scorers', '_normalize', '_weights_array', '_score_array')
+    
+    def __init__(self, scorers: List[URLScorer], normalize: bool = True):
+        """Initialize composite scorer combining multiple scoring strategies.
+        
+        Optimized for:
+        - Fast parallel scoring
+        - Memory efficient score aggregation
+        - Quick short-circuit conditions
+        - Pre-allocated arrays
+        
+        Args:
+            scorers: List of scoring strategies to combine
+            normalize: Whether to normalize final score by scorer count
+        """
+        super().__init__(weight=1.0)
+        self._scorers = scorers
+        self._normalize = normalize
+        
+        # Pre-allocate arrays for scores and weights
+        self._weights_array = array('f', [s.weight for s in scorers])
+        self._score_array = array('f', [0.0] * len(scorers))
 
-class KeywordRelevanceScorer(URLScorer):
+    @lru_cache(maxsize=10000)
+    def _calculate_score(self, url: str) -> float:
+        """Calculate combined score from all scoring strategies.
+        
+        Uses:
+        1. Pre-allocated arrays for scores
+        2. Short-circuit on zero scores
+        3. Optimized normalization
+        4. Vectorized operations where possible
+        
+        Args:
+            url: URL to score
+            
+        Returns:
+            Combined and optionally normalized score
+        """
+        total_score = 0.0
+        scores = self._score_array
+        
+        # Get scores from all scorers
+        for i, scorer in enumerate(self._scorers):
+            # Use public score() method which applies weight
+            scores[i] = scorer.score(url)
+            total_score += scores[i]
+            
+        # Normalize if requested
+        if self._normalize and self._scorers:
+            count = len(self._scorers)
+            return total_score / count
+            
+        return total_score
+
+    def score(self, url: str) -> float:
+        """Public scoring interface with stats tracking.
+        
+        Args:
+            url: URL to score
+            
+        Returns:
+            Final combined score
+        """
+        score = self._calculate_score(url)
+        self.stats.update(score)
+        return score
+
+class KeywordRelevanceScorer(URLScorer):   
+    # PERF: Regex compilation and pattern matching is expensive 
+    # PERF: List comprehension with pattern search has high overhead
+    # PERF: URL decoding on every calculation
+    # PERF: Division operation for normalization is costly
+    # PERF: Case insensitive regex adds overhead
+    # PERF: No pattern caching or reuse
+    # PERF: Using inheritance adds method lookup overhead
+   
     """Score URLs based on keyword relevance.
 
     keyword_scorer = KeywordRelevanceScorer(
@@ -109,8 +282,44 @@ class KeywordRelevanceScorer(URLScorer):
         # Normalize score between 0 and 1
         return total_matches / len(self.patterns) if self.patterns else 0.0
 
+class FastKeywordRelevanceScorer(FastURLScorer):
+    __slots__ = ('_weight', '_stats', '_keywords', '_case_sensitive')
+    
+    def __init__(self, keywords: List[str], weight: float = 1.0, case_sensitive: bool = False):
+        super().__init__(weight=weight)
+        self._case_sensitive = case_sensitive
+        # Pre-process keywords once
+        self._keywords = [k if case_sensitive else k.lower() for k in keywords]
+    
+    @lru_cache(maxsize=10000)
+    def _url_bytes(self, url: str) -> bytes:
+        """Cache decoded URL bytes"""
+        return url.encode('utf-8') if self._case_sensitive else url.lower().encode('utf-8')
+    
+    
+    def _calculate_score(self, url: str) -> float:
+        """Fast string matching without regex or byte conversion"""
+        if not self._case_sensitive:
+            url = url.lower()
+            
+        matches = sum(1 for k in self._keywords if k in url)
+        
+        # Fast return paths
+        if not matches:
+            return 0.0
+        if matches == len(self._keywords):
+            return 1.0
+            
+        return matches / len(self._keywords)
 
 class PathDepthScorer(URLScorer):
+    # PERF: URL parsing on every call is expensive
+    # PERF: Split and list comprehension creates temporary lists
+    # PERF: abs() call adds function overhead
+    # PERF: Division and addition in score calculation are expensive for high frequency
+    # PERF: Path parts filtering creates extra list
+    # PERF: Inherits URLScorer adding method lookup overhead
+    # PERF: No caching of parsed URLs or calculated depths    
     """Score URLs based on their path depth.
 
     path_scorer = PathDepthScorer(
@@ -136,8 +345,71 @@ class PathDepthScorer(URLScorer):
         distance_from_optimal = abs(depth - self.optimal_depth)
         return 1.0 / (1.0 + distance_from_optimal)
 
+class FastPathDepthScorer(FastURLScorer):
+    __slots__ = ('_weight', '_stats', '_optimal_depth')  # Remove _url_cache
+    
+    def __init__(self, optimal_depth: int = 3, weight: float = 1.0):
+        super().__init__(weight=weight)
+        self._optimal_depth = optimal_depth
+
+    @staticmethod
+    @lru_cache(maxsize=10000)
+    def _quick_depth(path: str) -> int:
+        """Ultra fast path depth calculation.
+        
+        Examples:
+            - "http://example.com" -> 0  # No path segments
+            - "http://example.com/" -> 0  # Empty path
+            - "http://example.com/a" -> 1
+            - "http://example.com/a/b" -> 2
+        """
+        if not path or path == '/':
+            return 0
+            
+        if '/' not in path:
+            return 0
+            
+        depth = 0
+        last_was_slash = True
+        
+        for c in path:
+            if c == '/':
+                if not last_was_slash:
+                    depth += 1
+                last_was_slash = True
+            else:
+                last_was_slash = False
+                
+        if not last_was_slash:
+            depth += 1
+            
+        return depth
+
+    @lru_cache(maxsize=10000)  # Cache the whole calculation
+    def _calculate_score(self, url: str) -> float:
+        pos = url.find('/', url.find('://') + 3)
+        if pos == -1:
+            depth = 0
+        else:
+            depth = self._quick_depth(url[pos:])
+            
+        # Use lookup table for common distances
+        distance = depth - self._optimal_depth
+        distance = distance if distance >= 0 else -distance  # Faster than abs()
+        
+        if distance < 4:
+            return _SCORE_LOOKUP[distance]
+            
+        return 1.0 / (1.0 + distance)                                             
 
 class ContentTypeScorer(URLScorer):
+    # PERF: Regex compilation on every initialization
+    # PERF: Dict lookup and regex search for every URL
+    # PERF: Pattern iteration adds loop overhead
+    # PERF: No pattern priority or short-circuit
+    # PERF: Dict storage has lookup overhead
+    # PERF: Missing extension fast path check
+    # PERF: Unnecessary regex for simple extensions    
     """Score URLs based on content type preferences.
 
     content_scorer = ContentTypeScorer({
@@ -169,8 +441,100 @@ class ContentTypeScorer(URLScorer):
                 return weight
         return 0.0
 
+class FastContentTypeScorer(FastURLScorer):
+    __slots__ = ('_weight', '_exact_types', '_regex_types')
+
+    def __init__(self, type_weights: Dict[str, float], weight: float = 1.0):
+        """Initialize scorer with type weights map.
+        
+        Args:
+            type_weights: Dict mapping file extensions/patterns to scores (e.g. {'.html$': 1.0})
+            weight: Overall weight multiplier for this scorer
+        """
+        super().__init__(weight=weight)
+        self._exact_types = {}  # Fast lookup for simple extensions
+        self._regex_types = []  # Fallback for complex patterns
+        
+        # Split into exact vs regex matchers for performance
+        for pattern, score in type_weights.items():
+            if pattern.startswith('.') and pattern.endswith('$'):
+                ext = pattern[1:-1]
+                self._exact_types[ext] = score
+            else:
+                self._regex_types.append((re.compile(pattern), score))
+                
+        # Sort complex patterns by score for early exit
+        self._regex_types.sort(key=lambda x: -x[1])
+
+    @staticmethod
+    @lru_cache(maxsize=10000)
+    def _quick_extension(url: str) -> str:
+        """Extract file extension ultra-fast without regex/splits.
+        
+        Handles:
+        - Basic extensions: "example.html" -> "html"
+        - Query strings: "page.php?id=1" -> "php" 
+        - Fragments: "doc.pdf#page=1" -> "pdf"
+        - Path params: "file.jpg;width=100" -> "jpg"
+        
+        Args:
+            url: URL to extract extension from
+            
+        Returns:
+            Extension without dot, or empty string if none found
+        """
+        pos = url.rfind('.')
+        if pos == -1:
+            return ''
+        
+        # Find first non-alphanumeric char after extension
+        end = len(url)
+        for i in range(pos + 1, len(url)):
+            c = url[i]
+            # Stop at query string, fragment, path param or any non-alphanumeric
+            if c in '?#;' or not c.isalnum():
+                end = i
+                break
+                
+        return url[pos + 1:end].lower()
+
+    @lru_cache(maxsize=10000)
+    def _calculate_score(self, url: str) -> float:
+        """Calculate content type score for URL.
+        
+        Uses staged approach:
+        1. Try exact extension match (fast path)
+        2. Fall back to regex patterns if needed
+        
+        Args:
+            url: URL to score
+            
+        Returns:
+            Score between 0.0 and 1.0 * weight
+        """
+        # Fast path: direct extension lookup
+        ext = self._quick_extension(url)
+        if ext:
+            score = self._exact_types.get(ext, None)
+            if score is not None:
+                return score
+                
+        # Slow path: regex patterns
+        for pattern, score in self._regex_types:
+            if pattern.search(url):
+                return score
+
+        return 0.0
 
 class FreshnessScorer(URLScorer):
+    # PERF: Multiple regex compilations for each pattern
+    # PERF: Tries all patterns sequentially 
+    # PERF: Regex pattern matching is expensive
+    # PERF: Int conversion and arithmetic for every match
+    # PERF: Repeated constant value (2024) hardcoded
+    # PERF: No URL caching
+    # PERF: Complex patterns with redundant groups
+    # PERF: Unnecessary list of patterns when could combine
     """Score URLs based on freshness indicators.
 
     freshness_scorer = FreshnessScorer(weight=0.9)
@@ -201,8 +565,96 @@ class FreshnessScorer(URLScorer):
                 return 1.0 - (2024 - year) * 0.1
         return 0.5  # Default score for URLs without dates
 
+class FastFreshnessScorer(FastURLScorer):
+    __slots__ = ('_weight', '_date_pattern', '_current_year')
+
+    def __init__(self, weight: float = 1.0, current_year: int = 2024):
+        """Initialize freshness scorer.
+        
+        Extracts and scores dates from URLs using format:
+        - YYYY/MM/DD 
+        - YYYY-MM-DD
+        - YYYY_MM_DD
+        - YYYY (year only)
+        
+        Args:
+            weight: Score multiplier
+            current_year: Year to calculate freshness against (default 2024)
+        """
+        super().__init__(weight=weight)
+        self._current_year = current_year
+        
+        # Combined pattern for all date formats
+        # Uses non-capturing groups (?:) and alternation
+        self._date_pattern = re.compile(
+            r'(?:/'  # Path separator
+            r'|[-_])'  # or date separators
+            r'((?:19|20)\d{2})'  # Year group (1900-2099)
+            r'(?:'  # Optional month/day group
+            r'(?:/|[-_])'  # Date separator  
+            r'(?:\d{2})'  # Month
+            r'(?:'  # Optional day
+            r'(?:/|[-_])'  # Date separator
+            r'(?:\d{2})'  # Day
+            r')?'  # Day is optional
+            r')?'  # Month/day group is optional
+        )
+
+    @lru_cache(maxsize=10000)
+    def _extract_year(self, url: str) -> Optional[int]:
+        """Extract the most recent year from URL.
+        
+        Args:
+            url: URL to extract year from
+            
+        Returns:
+            Year as int or None if no valid year found
+        """
+        matches = self._date_pattern.finditer(url)
+        latest_year = None
+        
+        # Find most recent year
+        for match in matches:
+            year = int(match.group(1))
+            if (year <= self._current_year and  # Sanity check
+                (latest_year is None or year > latest_year)):
+                latest_year = year
+                
+        return latest_year
+
+    @lru_cache(maxsize=10000) 
+    def _calculate_score(self, url: str) -> float:
+        """Calculate freshness score based on URL date.
+        
+        More recent years score higher. Uses pre-computed scoring
+        table for common year differences.
+        
+        Args:
+            url: URL to score
+            
+        Returns:
+            Score between 0.0 and 1.0 * weight
+        """
+        year = self._extract_year(url)
+        if year is None:
+            return 0.5  # Default score
+            
+        # Use lookup table for common year differences
+        year_diff = self._current_year - year
+        if year_diff < len(_FRESHNESS_SCORES):
+            return _FRESHNESS_SCORES[year_diff]
+            
+        # Fallback calculation for older content
+        return max(0.1, 1.0 - year_diff * 0.1)
 
 class DomainAuthorityScorer(URLScorer):
+    # PERF: URL parsing on every score calculation
+    # PERF: Repeated domain extraction
+    # PERF: Case conversion on every lookup
+    # PERF: Dict lookup without caching
+    # PERF: Processes full URL when only needs domain
+    # PERF: No fast path for common domains
+    # PERF: Netloc includes port which requires extra processing
     """Score URLs based on domain authority.
 
     authority_scorer = DomainAuthorityScorer({
@@ -230,6 +682,112 @@ class DomainAuthorityScorer(URLScorer):
         domain = urlparse(url).netloc.lower()
         return self.domain_weights.get(domain, self.default_weight)
 
+class FastDomainAuthorityScorer(FastURLScorer):
+    __slots__ = ('_weight', '_domain_weights', '_default_weight', '_top_domains')
+    
+    def __init__(
+        self,
+        domain_weights: Dict[str, float],
+        default_weight: float = 0.5,
+        weight: float = 1.0,
+    ):
+        """Initialize domain authority scorer.
+        
+        Args:
+            domain_weights: Dict mapping domains to authority scores
+            default_weight: Score for unknown domains
+            weight: Overall scorer weight multiplier
+            
+        Example:
+            {
+                'python.org': 1.0,
+                'github.com': 0.9,
+                'medium.com': 0.7
+            }
+        """
+        super().__init__(weight=weight)
+        
+        # Pre-process domains for faster lookup
+        self._domain_weights = {
+            domain.lower(): score 
+            for domain, score in domain_weights.items()
+        }
+        self._default_weight = default_weight
+        
+        # Cache top domains for fast path
+        self._top_domains = {
+            domain: score
+            for domain, score in sorted(
+                domain_weights.items(), 
+                key=lambda x: -x[1]
+            )[:5]  # Keep top 5 highest scoring domains
+        }
+
+    @staticmethod
+    @lru_cache(maxsize=10000)
+    def _extract_domain(url: str) -> str:
+        """Extract domain from URL ultra-fast.
+        
+        Handles:
+        - Basic domains: "example.com"
+        - Subdomains: "sub.example.com" 
+        - Ports: "example.com:8080"
+        - IPv4: "192.168.1.1"
+        
+        Args:
+            url: Full URL to extract domain from
+            
+        Returns:
+            Lowercase domain without port
+        """
+        # Find domain start
+        start = url.find('://') 
+        if start == -1:
+            start = 0
+        else:
+            start += 3
+            
+        # Find domain end
+        end = url.find('/', start)
+        if end == -1:
+            end = url.find('?', start)
+            if end == -1:
+                end = url.find('#', start)
+                if end == -1:
+                    end = len(url)
+                    
+        # Extract domain and remove port
+        domain = url[start:end]
+        port_idx = domain.rfind(':')
+        if port_idx != -1:
+            domain = domain[:port_idx]
+            
+        return domain.lower()
+
+    @lru_cache(maxsize=10000)
+    def _calculate_score(self, url: str) -> float:
+        """Calculate domain authority score.
+        
+        Uses staged approach:
+        1. Check top domains (fastest)
+        2. Check full domain weights
+        3. Return default weight
+        
+        Args:
+            url: URL to score
+            
+        Returns:
+            Authority score between 0.0 and 1.0 * weight
+        """
+        domain = self._extract_domain(url)
+        
+        # Fast path: check top domains first
+        score = self._top_domains.get(domain)
+        if score is not None:
+            return score
+            
+        # Regular path: check all domains
+        return self._domain_weights.get(domain, self._default_weight)
 
 def create_balanced_scorer() -> CompositeScorer:
     """Create a balanced composite scorer"""
@@ -247,6 +805,21 @@ def create_balanced_scorer() -> CompositeScorer:
         ]
     )
 
+def create_balanced_fast_freshness_scorer() -> CompositeScorer:
+    """Create a balanced composite scorer with fast freshness scorer"""
+    return FastCompositeScorer(
+        [
+            FastKeywordRelevanceScorer(
+                keywords=["article", "blog", "news", "research"], weight=1.0
+            ),
+            FastPathDepthScorer(optimal_depth=3, weight=0.7),
+            FastContentTypeScorer(
+                type_weights={r"\.html?$": 1.0, r"\.pdf$": 0.8, r"\.xml$": 0.6},
+                weight=0.8,
+            ),
+            FastFreshnessScorer(weight=0.9),
+        ]
+    )
 
 # Example Usage:
 """
@@ -272,3 +845,361 @@ score = scorer.score("https://python.org/article/2024/01/new-features")
 print(f"Average score: {scorer.stats.average_score}")
 print(f"URLs scored: {scorer.stats.urls_scored}")
 """
+
+
+def run_scorer_performance_test():
+    import time
+    import random
+    from itertools import cycle
+    import sys
+
+    # Generate varied test URLs
+    base_urls = [
+        # News/blog articles with dates
+        "https://example.com/2024/01/article-123",
+        "https://news.com/2023-12-31/breaking-news",
+        "https://blog.site.com/2022_11_15/tech-update",
+        
+        # Different content types
+        "https://docs.example.com/report.pdf",
+        "https://site.com/page.html?q=test",
+        "https://api.service.com/data.json",
+        
+        # Various domain authorities
+        "https://python.org/downloads",
+        "https://github.com/repo/code",
+        "https://medium.com/@user/post",
+        
+        # Different path depths
+        "https://site.com/category/subcategory/product/detail",
+        "https://shop.com/items",
+        "https://edu.org/courses/cs/intro/lecture1",
+    ]
+
+    # Create variations
+    test_urls = []
+    years = list(range(2020, 2025))
+    domains = ["example.com", "python.org", "github.com", "medium.com"]
+    extensions = ["html", "pdf", "php", "jsx"]
+    
+    for base in base_urls:
+        test_urls.append(base)
+        # Add year variations
+        for year in years:
+            test_urls.append(f"https://blog.com/{year}/post-{random.randint(1,999)}")
+        # Add domain variations    
+        for domain in domains:
+            test_urls.append(f"https://{domain}/article-{random.randint(1,999)}")
+        # Add extension variations    
+        for ext in extensions:
+            test_urls.append(f"https://site.com/doc-{random.randint(1,999)}.{ext}")
+
+    # Multiply dataset
+    test_urls = test_urls * 5000  # Creates ~300k URLs
+    
+    def benchmark(name: str, scorer, urls, warmup=True):
+        if warmup:
+            for url in urls[:100]:  # Warmup with subset
+                scorer.score(url)
+
+        start = time.perf_counter_ns()
+        for url in urls:
+            scorer.score(url)
+        elapsed = (time.perf_counter_ns() - start) / 1_000_000  # Convert to ms
+        
+        print(
+            f"{name:<35} {elapsed:>8.3f} ms  ({len(urls)/elapsed*1000:,.0f} URLs/sec)"
+        )
+        return elapsed
+
+    print("\nBenchmarking original vs optimized scorers...")
+    print("-" * 75)
+
+    # Initialize test data
+    domain_weights = {"python.org": 1.0, "github.com": 0.9, "medium.com": 0.7}
+    type_weights = {".html$": 1.0, ".pdf$": 0.8, ".php$": 0.6}
+    keywords = ["python", "article", "blog", "docs"]
+
+    # Original implementations
+    keyword_scorer = KeywordRelevanceScorer(keywords=keywords, weight=1.0)
+    path_scorer = PathDepthScorer(optimal_depth=3, weight=0.7)
+    content_scorer = ContentTypeScorer(type_weights=type_weights, weight=0.8)
+    freshness_scorer = FreshnessScorer(weight=0.9)
+    domain_scorer = DomainAuthorityScorer(domain_weights=domain_weights, weight=1.0)
+
+    # Fast implementations
+    fast_keyword_scorer = FastKeywordRelevanceScorer(keywords=keywords, weight=1.0)
+    fast_path_scorer = FastPathDepthScorer(optimal_depth=3, weight=0.7)
+    fast_content_scorer = FastContentTypeScorer(type_weights=type_weights, weight=0.8)
+    fast_freshness_scorer = FastFreshnessScorer(weight=0.9)
+    fast_domain_scorer = FastDomainAuthorityScorer(domain_weights=domain_weights, weight=1.0)
+
+    # Test subset for individual scorers
+    test_subset = test_urls[:1000]
+
+    print("\nIndividual Scorer Performance (first 1000 URLs):")
+    
+    print("\nKeyword Relevance Scorers:")
+    benchmark("Original Keyword Scorer", keyword_scorer, test_subset)
+    benchmark("Optimized Keyword Scorer", fast_keyword_scorer, test_subset)
+
+    print("\nPath Depth Scorers:")
+    benchmark("Original Path Scorer", path_scorer, test_subset)
+    benchmark("Optimized Path Scorer", fast_path_scorer, test_subset)
+
+    print("\nContent Type Scorers:")
+    benchmark("Original Content Scorer", content_scorer, test_subset)
+    benchmark("Optimized Content Scorer", fast_content_scorer, test_subset)
+
+    print("\nFreshness Scorers:")
+    benchmark("Original Freshness Scorer", freshness_scorer, test_subset)
+    benchmark("Optimized Freshness Scorer", fast_freshness_scorer, test_subset)
+
+    print("\nDomain Authority Scorers:")
+    benchmark("Original Domain Scorer", domain_scorer, test_subset)
+    benchmark("Optimized Domain Scorer", fast_domain_scorer, test_subset)
+
+    # Test composite scorers
+    print("\nComposite Scorer Performance (all URLs):")
+    
+    original_composite = CompositeScorer([
+        keyword_scorer, path_scorer, content_scorer, 
+        freshness_scorer, domain_scorer
+    ])
+    
+    fast_composite = FastCompositeScorer([
+        fast_keyword_scorer, fast_path_scorer, fast_content_scorer,
+        fast_freshness_scorer, fast_domain_scorer
+    ])
+
+    benchmark("Original Composite Scorer", original_composite, test_urls)
+    benchmark("Optimized Composite Scorer", fast_composite, test_urls)
+
+    # Memory usage
+    print("\nMemory Usage per Scorer:")
+    print(f"Original Keyword Scorer: {sys.getsizeof(keyword_scorer):,} bytes")
+    print(f"Optimized Keyword Scorer: {sys.getsizeof(fast_keyword_scorer):,} bytes")
+    print(f"Original Path Scorer: {sys.getsizeof(path_scorer):,} bytes")
+    print(f"Optimized Path Scorer: {sys.getsizeof(fast_path_scorer):,} bytes")
+    print(f"Original Content Scorer: {sys.getsizeof(content_scorer):,} bytes")
+    print(f"Optimized Content Scorer: {sys.getsizeof(fast_content_scorer):,} bytes")
+    print(f"Original Freshness Scorer: {sys.getsizeof(freshness_scorer):,} bytes")
+    print(f"Optimized Freshness Scorer: {sys.getsizeof(fast_freshness_scorer):,} bytes")
+    print(f"Original Domain Scorer: {sys.getsizeof(domain_scorer):,} bytes")
+    print(f"Optimized Domain Scorer: {sys.getsizeof(fast_domain_scorer):,} bytes")
+    print(f"Original Composite: {sys.getsizeof(original_composite):,} bytes")
+    print(f"Optimized Composite: {sys.getsizeof(fast_composite):,} bytes")
+
+def test_scorers():
+    import time
+    from itertools import chain
+
+    test_cases = [
+        # Keyword Scorer Tests
+        {
+            "scorer_type": "keyword",
+            "config": {
+                "keywords": ["python", "blog"],
+                "weight": 1.0,
+                "case_sensitive": False
+            },
+            "urls": {
+                "https://example.com/python-blog": 1.0,
+                "https://example.com/PYTHON-BLOG": 1.0,
+                "https://example.com/python-only": 0.5,
+                "https://example.com/other": 0.0
+            }
+        },
+        
+        # Path Depth Scorer Tests
+        {
+            "scorer_type": "path_depth",
+            "config": {
+                "optimal_depth": 2,
+                "weight": 1.0
+            },
+            "urls": {
+                "https://example.com/a/b": 1.0,
+                "https://example.com/a": 0.5,
+                "https://example.com/a/b/c": 0.5,
+                "https://example.com": 0.33333333
+            }
+        },
+        
+        # Content Type Scorer Tests
+        {
+            "scorer_type": "content_type",
+            "config": {
+                "type_weights": {
+                    ".html$": 1.0,
+                    ".pdf$": 0.8,
+                    ".jpg$": 0.6
+                },
+                "weight": 1.0
+            },
+            "urls": {
+                "https://example.com/doc.html": 1.0,
+                "https://example.com/doc.pdf": 0.8,
+                "https://example.com/img.jpg": 0.6,
+                "https://example.com/other.txt": 0.0
+            }
+        },
+        
+        # Freshness Scorer Tests
+        {
+            "scorer_type": "freshness",
+            "config": {
+                "weight": 1.0,  # Remove current_year since original doesn't support it
+            },
+            "urls": {
+                "https://example.com/2024/01/post": 1.0,
+                "https://example.com/2023/12/post": 0.9,
+                "https://example.com/2022/post": 0.8,
+                "https://example.com/no-date": 0.5
+            }
+        },
+        
+        # Domain Authority Scorer Tests
+        {
+            "scorer_type": "domain",
+            "config": {
+                "domain_weights": {
+                    "python.org": 1.0,
+                    "github.com": 0.8,
+                    "medium.com": 0.6
+                },
+                "default_weight": 0.3,
+                "weight": 1.0
+            },
+            "urls": {
+                "https://python.org/about": 1.0,
+                "https://github.com/repo": 0.8,
+                "https://medium.com/post": 0.6,
+                "https://unknown.com": 0.3
+            }
+        }
+    ]
+
+    def create_scorer(scorer_type, config):
+        if scorer_type == "keyword":
+            return (
+                KeywordRelevanceScorer(**config),
+                FastKeywordRelevanceScorer(**config)
+            )
+        elif scorer_type == "path_depth":
+            return (
+                PathDepthScorer(**config),
+                FastPathDepthScorer(**config)
+            )
+        elif scorer_type == "content_type":
+            return (
+                ContentTypeScorer(**config),
+                FastContentTypeScorer(**config)
+            )
+        elif scorer_type == "freshness":
+            return (
+        FreshnessScorer(**config),
+        FastFreshnessScorer(**config, current_year=2024)
+            )
+        elif scorer_type == "domain":
+            return (
+                DomainAuthorityScorer(**config),
+                FastDomainAuthorityScorer(**config)
+            )
+
+    def run_accuracy_test():
+        print("\nAccuracy Tests:")
+        print("-" * 50)
+        
+        all_passed = True
+        for test_case in test_cases:
+            print(f"\nTesting {test_case['scorer_type']} scorer:")
+            original, fast = create_scorer(
+                test_case['scorer_type'],
+                test_case['config']
+            )
+            
+            for url, expected in test_case['urls'].items():
+                orig_score = round(original.score(url), 8)
+                fast_score = round(fast.score(url), 8)
+                expected = round(expected, 8)
+                
+                if abs(orig_score - expected) > 0.00001:
+                    print(f"❌ Original Failed: URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {orig_score}")
+                    all_passed = False
+                else:
+                    print(f"✅ Original Passed: URL '{url}'")
+                    
+                if abs(fast_score - expected) > 0.00001:
+                    print(f"❌ Fast Failed: URL '{url}'")
+                    print(f"   Expected: {expected}, Got: {fast_score}")
+                    all_passed = False
+                else:
+                    print(f"✅ Fast Passed: URL '{url}'")
+                    
+        return all_passed
+
+    def run_composite_test():
+        print("\nTesting Composite Scorer:")
+        print("-" * 50)
+        
+        # Create test data
+        test_urls = {
+            "https://python.org/blog/2024/01/new-release.html":0.86666667,
+            "https://github.com/repo/old-code.pdf": 0.62,
+            "https://unknown.com/random": 0.26
+        }
+        
+        # Create composite scorers with all types
+        original_scorers = []
+        fast_scorers = []
+        
+        for test_case in test_cases:
+            orig, fast = create_scorer(
+                test_case['scorer_type'],
+                test_case['config']
+            )
+            original_scorers.append(orig)
+            fast_scorers.append(fast)
+            
+        original_composite = CompositeScorer(original_scorers, normalize=True)
+        fast_composite = FastCompositeScorer(fast_scorers, normalize=True)
+        
+        all_passed = True
+        for url, expected in test_urls.items():
+            orig_score = round(original_composite.score(url), 8)
+            fast_score = round(fast_composite.score(url), 8)
+            
+            if abs(orig_score - expected) > 0.00001:
+                print(f"❌ Original Composite Failed: URL '{url}'")
+                print(f"   Expected: {expected}, Got: {orig_score}")
+                all_passed = False
+            else:
+                print(f"✅ Original Composite Passed: URL '{url}'")
+                
+            if abs(fast_score - expected) > 0.00001:
+                print(f"❌ Fast Composite Failed: URL '{url}'")
+                print(f"   Expected: {expected}, Got: {fast_score}")
+                all_passed = False
+            else:
+                print(f"✅ Fast Composite Passed: URL '{url}'")
+                
+        return all_passed
+
+    # Run tests
+    print("Running Scorer Tests...")
+    accuracy_passed = run_accuracy_test()
+    composite_passed = run_composite_test()
+    
+    if accuracy_passed and composite_passed:
+        print("\n✨ All tests passed!")
+        # Note: Already have performance tests in run_scorer_performance_test()
+    else:
+        print("\n❌ Some tests failed!")
+
+    
+
+if __name__ == "__main__":
+    run_scorer_performance_test()
+    # test_scorers()
\ No newline at end of file
diff --git a/crawl4ai/scraper/scorers_review.py b/crawl4ai/scraper/scorers_review.py
deleted file mode 100644
index 990254d5..00000000
--- a/crawl4ai/scraper/scorers_review.py
+++ /dev/null
@@ -1,1208 +0,0 @@
-# from .url_scorer import URLScorer
-# from .keyword_relevance_scorer import KeywordRelevanceScorer
-
-from abc import ABC, abstractmethod
-from typing import List, Dict, Optional, Union
-from dataclasses import dataclass
-from urllib.parse import urlparse, unquote
-import re
-from collections import defaultdict
-import math
-import logging
-from functools import lru_cache
-from array import array
-from functools import lru_cache
-import ctypes
-import platform
-PLATFORM = platform.system()
-
-# Pre-computed scores for common year differences
-_SCORE_LOOKUP = [1.0, 0.5, 0.3333333333333333, 0.25]
-
-# Pre-computed scores for common year differences
-_FRESHNESS_SCORES = [
-   1.0,    # Current year
-   0.9,    # Last year
-   0.8,    # 2 years ago
-   0.7,    # 3 years ago
-   0.6,    # 4 years ago
-   0.5,    # 5 years ago
-]
-
-# Pre-computed normalization factors for powers of 2
-_POW2_NORM = [1.0, 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625]
-
-
-@dataclass
-class ScoringStats:
-    # PERF: Dataclass introduces overhead with property access and __init__
-    # PERF: Float operations and comparisons are expensive for high-frequency updates
-    # PERF: Property calculation on every access is inefficient
-    # PERF: Storing min/max adds memory overhead and comparison costs
-    # PERF: Using inf/-inf creates unnecessary float objects
-    urls_scored: int = 0
-    total_score: float = 0.0
-    min_score: float = float("inf")  # Expensive object creation
-    max_score: float = float("-inf")
-    
-    def update(self, score: float):
-        """Update scoring statistics"""
-        self.urls_scored += 1
-        self.total_score += score
-        self.min_score = min(self.min_score, score)
-        self.max_score = max(self.max_score, score)
-
-    @property
-    def average_score(self) -> float:
-        """Calculate average score"""
-        return self.total_score / self.urls_scored if self.urls_scored > 0 else 0.0
-
-class FastScoringStats:
-    __slots__ = ('_urls_scored', '_total_score', '_min_score', '_max_score')
-    
-    def __init__(self):
-        self._urls_scored = 0
-        self._total_score = 0.0
-        self._min_score = None  # Lazy initialization
-        self._max_score = None
-    
-    def update(self, score: float) -> None:
-        """Optimized update with minimal operations"""
-        self._urls_scored += 1
-        self._total_score += score
-        
-        # Lazy min/max tracking - only if actually accessed
-        if self._min_score is not None:
-            if score < self._min_score:
-                self._min_score = score
-        if self._max_score is not None:
-            if score > self._max_score:
-                self._max_score = score
-                
-    def get_average(self) -> float:
-        """Direct calculation instead of property"""
-        return self._total_score / self._urls_scored if self._urls_scored else 0.0
-    
-    def get_min(self) -> float:
-        """Lazy min calculation"""
-        if self._min_score is None:
-            self._min_score = self._total_score / self._urls_scored if self._urls_scored else 0.0
-        return self._min_score
-        
-    def get_max(self) -> float:
-        """Lazy max calculation"""
-        if self._max_score is None:
-            self._max_score = self._total_score / self._urls_scored if self._urls_scored else 0.0
-        return self._max_score
-
-class URLScorer(ABC):
-    # PERF: Property access overhead for weight
-    # PERF: Unnecessary name attribute
-    # PERF: Stats object creation overhead
-    # PERF: Logger creation for each instance
-    # PERF: Abstract method overhead
-
-    def __init__(self, weight: float = 1.0, name: str = None):
-        self.weight = weight
-        self.name = name or self.__class__.__name__
-        self.stats = ScoringStats()
-        self.logger = logging.getLogger(f"urlscorer.{self.name}")
-
-    @abstractmethod
-    def _calculate_score(self, url: str) -> float:
-        pass
-
-    def score(self, url: str) -> float:
-        raw_score = self._calculate_score(url)
-        weighted_score = raw_score * self.weight
-        self.stats.update(weighted_score)
-        return weighted_score
-
-# Optimized base class
-class FastURLScorer(ABC):
-    __slots__ = ('_weight', '_stats')
-    
-    def __init__(self, weight: float = 1.0):
-        # Store weight directly as float32 for memory efficiency
-        self._weight = ctypes.c_float(weight).value
-        self._stats = ScoringStats()
-    
-    @abstractmethod
-    def _calculate_score(self, url: str) -> float:
-        """Calculate raw score for URL."""
-        pass
-    
-    def score(self, url: str) -> float:
-        """Calculate weighted score with minimal overhead."""
-        score = self._calculate_score(url) * self._weight
-        self._stats.update(score)
-        return score
-    
-    @property
-    def stats(self):
-        """Access to scoring statistics."""
-        return self._stats
-    
-    @property
-    def weight(self):
-        return self._weight
-
-class CompositeScorer(URLScorer):
-    # PERF: Unnecessary list iteration for each score
-    # PERF: Creates new list for scores
-    # PERF: Division on every normalization
-    # PERF: No parallelization for independent scorers
-    # PERF: No short circuit for zero scores
-    # PERF: No weighting optimization
-    # PERF: No caching of combined scores
-    # PERF: List allocation for scores storag
-    """Combines multiple scorers with weights"""
-
-    def __init__(self, scorers: List[URLScorer], normalize: bool = True):
-        super().__init__(name="CompositeScorer")
-        self.scorers = scorers
-        self.normalize = normalize
-
-    def _calculate_score(self, url: str) -> float:
-        scores = [scorer.score(url) for scorer in self.scorers]
-        total_score = sum(scores)
-
-        if self.normalize and scores:
-            total_score /= len(scores)
-
-        return total_score
-
-class FastCompositeScorer(FastURLScorer):
-    __slots__ = ('_scorers', '_normalize', '_weights_array', '_score_array')
-    
-    def __init__(self, scorers: List[URLScorer], normalize: bool = True):
-        """Initialize composite scorer combining multiple scoring strategies.
-        
-        Optimized for:
-        - Fast parallel scoring
-        - Memory efficient score aggregation
-        - Quick short-circuit conditions
-        - Pre-allocated arrays
-        
-        Args:
-            scorers: List of scoring strategies to combine
-            normalize: Whether to normalize final score by scorer count
-        """
-        super().__init__(weight=1.0)
-        self._scorers = scorers
-        self._normalize = normalize
-        
-        # Pre-allocate arrays for scores and weights
-        self._weights_array = array('f', [s.weight for s in scorers])
-        self._score_array = array('f', [0.0] * len(scorers))
-
-    @lru_cache(maxsize=10000)
-    def _calculate_score(self, url: str) -> float:
-        """Calculate combined score from all scoring strategies.
-        
-        Uses:
-        1. Pre-allocated arrays for scores
-        2. Short-circuit on zero scores
-        3. Optimized normalization
-        4. Vectorized operations where possible
-        
-        Args:
-            url: URL to score
-            
-        Returns:
-            Combined and optionally normalized score
-        """
-        total_score = 0.0
-        scores = self._score_array
-        
-        # Get scores from all scorers
-        for i, scorer in enumerate(self._scorers):
-            # Use public score() method which applies weight
-            scores[i] = scorer.score(url)
-            total_score += scores[i]
-            
-        # Normalize if requested
-        if self._normalize and self._scorers:
-            count = len(self._scorers)
-            return total_score / count
-            
-        return total_score
-
-    def score(self, url: str) -> float:
-        """Public scoring interface with stats tracking.
-        
-        Args:
-            url: URL to score
-            
-        Returns:
-            Final combined score
-        """
-        score = self._calculate_score(url)
-        self.stats.update(score)
-        return score
-
-class KeywordRelevanceScorer(URLScorer):   
-    # PERF: Regex compilation and pattern matching is expensive 
-    # PERF: List comprehension with pattern search has high overhead
-    # PERF: URL decoding on every calculation
-    # PERF: Division operation for normalization is costly
-    # PERF: Case insensitive regex adds overhead
-    # PERF: No pattern caching or reuse
-    # PERF: Using inheritance adds method lookup overhead
-   
-    """Score URLs based on keyword relevance.
-
-    keyword_scorer = KeywordRelevanceScorer(
-        keywords=["python", "programming"],
-        weight=1.0,
-        case_sensitive=False
-    )
-
-    - Score based on keyword matches
-    - Case sensitivity options
-    - Weighted scoring
-    """
-
-    def __init__(
-        self, keywords: List[str], weight: float = 1.0, case_sensitive: bool = False
-    ):
-        super().__init__(weight=weight)
-        self.keywords = keywords
-        self.case_sensitive = case_sensitive
-        self._compile_keywords()
-
-    def _compile_keywords(self):
-        """Prepare keywords for matching"""
-        flags = 0 if self.case_sensitive else re.IGNORECASE
-        self.patterns = [re.compile(re.escape(k), flags) for k in self.keywords]
-
-    def _calculate_score(self, url: str) -> float:
-        """Calculate score based on keyword matches"""
-        decoded_url = unquote(url)
-        total_matches = sum(
-            1 for pattern in self.patterns if pattern.search(decoded_url)
-        )
-        # Normalize score between 0 and 1
-        return total_matches / len(self.patterns) if self.patterns else 0.0
-
-class FastKeywordRelevanceScorer(FastURLScorer):
-    __slots__ = ('_weight', '_stats', '_keywords', '_case_sensitive')
-    
-    def __init__(self, keywords: List[str], weight: float = 1.0, case_sensitive: bool = False):
-        super().__init__(weight=weight)
-        self._case_sensitive = case_sensitive
-        # Pre-process keywords once
-        self._keywords = [k if case_sensitive else k.lower() for k in keywords]
-    
-    @lru_cache(maxsize=10000)
-    def _url_bytes(self, url: str) -> bytes:
-        """Cache decoded URL bytes"""
-        return url.encode('utf-8') if self._case_sensitive else url.lower().encode('utf-8')
-    
-    
-    def _calculate_score(self, url: str) -> float:
-        """Fast string matching without regex or byte conversion"""
-        if not self._case_sensitive:
-            url = url.lower()
-            
-        matches = sum(1 for k in self._keywords if k in url)
-        
-        # Fast return paths
-        if not matches:
-            return 0.0
-        if matches == len(self._keywords):
-            return 1.0
-            
-        return matches / len(self._keywords)
-
-class PathDepthScorer(URLScorer):
-    # PERF: URL parsing on every call is expensive
-    # PERF: Split and list comprehension creates temporary lists
-    # PERF: abs() call adds function overhead
-    # PERF: Division and addition in score calculation are expensive for high frequency
-    # PERF: Path parts filtering creates extra list
-    # PERF: Inherits URLScorer adding method lookup overhead
-    # PERF: No caching of parsed URLs or calculated depths    
-    """Score URLs based on their path depth.
-
-    path_scorer = PathDepthScorer(
-        optimal_depth=3,  # Preferred URL depth
-        weight=0.7
-    )
-
-    - Score based on URL path depth
-    - Configurable optimal depth
-    - Diminishing returns for deeper paths
-    """
-
-    def __init__(self, optimal_depth: int = 3, weight: float = 1.0):
-        super().__init__(weight=weight)
-        self.optimal_depth = optimal_depth
-
-    def _calculate_score(self, url: str) -> float:
-        """Calculate score based on path depth"""
-        path = urlparse(url).path
-        depth = len([x for x in path.split("/") if x])
-
-        # Score decreases as we move away from optimal depth
-        distance_from_optimal = abs(depth - self.optimal_depth)
-        return 1.0 / (1.0 + distance_from_optimal)
-
-class FastPathDepthScorer(FastURLScorer):
-    __slots__ = ('_weight', '_stats', '_optimal_depth')  # Remove _url_cache
-    
-    def __init__(self, optimal_depth: int = 3, weight: float = 1.0):
-        super().__init__(weight=weight)
-        self._optimal_depth = optimal_depth
-
-    @staticmethod
-    @lru_cache(maxsize=10000)
-    def _quick_depth(path: str) -> int:
-        """Ultra fast path depth calculation.
-        
-        Examples:
-            - "http://example.com" -> 0  # No path segments
-            - "http://example.com/" -> 0  # Empty path
-            - "http://example.com/a" -> 1
-            - "http://example.com/a/b" -> 2
-        """
-        if not path or path == '/':
-            return 0
-            
-        if '/' not in path:
-            return 0
-            
-        depth = 0
-        last_was_slash = True
-        
-        for c in path:
-            if c == '/':
-                if not last_was_slash:
-                    depth += 1
-                last_was_slash = True
-            else:
-                last_was_slash = False
-                
-        if not last_was_slash:
-            depth += 1
-            
-        return depth
-
-    @lru_cache(maxsize=10000)  # Cache the whole calculation
-    def _calculate_score(self, url: str) -> float:
-        pos = url.find('/', url.find('://') + 3)
-        if pos == -1:
-            depth = 0
-        else:
-            depth = self._quick_depth(url[pos:])
-            
-        # Use lookup table for common distances
-        distance = depth - self._optimal_depth
-        distance = distance if distance >= 0 else -distance  # Faster than abs()
-        
-        if distance < 4:
-            return _SCORE_LOOKUP[distance]
-            
-        return 1.0 / (1.0 + distance)                                             
-
-class ContentTypeScorer(URLScorer):
-    # PERF: Regex compilation on every initialization
-    # PERF: Dict lookup and regex search for every URL
-    # PERF: Pattern iteration adds loop overhead
-    # PERF: No pattern priority or short-circuit
-    # PERF: Dict storage has lookup overhead
-    # PERF: Missing extension fast path check
-    # PERF: Unnecessary regex for simple extensions    
-    """Score URLs based on content type preferences.
-
-    content_scorer = ContentTypeScorer({
-        r'\.html$': 1.0,
-        r'\.pdf$': 0.8,
-        r'\.xml$': 0.6
-    })
-
-    - Score based on file types
-    - Configurable type weights
-    - Pattern matching support
-    """
-
-    def __init__(self, type_weights: Dict[str, float], weight: float = 1.0):
-        super().__init__(weight=weight)
-        self.type_weights = type_weights
-        self._compile_patterns()
-
-    def _compile_patterns(self):
-        """Prepare content type patterns"""
-        self.patterns = {
-            re.compile(pattern): weight for pattern, weight in self.type_weights.items()
-        }
-
-    def _calculate_score(self, url: str) -> float:
-        """Calculate score based on content type matching"""
-        for pattern, weight in self.patterns.items():
-            if pattern.search(url):
-                return weight
-        return 0.0
-
-class FastContentTypeScorer(FastURLScorer):
-    __slots__ = ('_weight', '_exact_types', '_regex_types')
-
-    def __init__(self, type_weights: Dict[str, float], weight: float = 1.0):
-        """Initialize scorer with type weights map.
-        
-        Args:
-            type_weights: Dict mapping file extensions/patterns to scores (e.g. {'.html$': 1.0})
-            weight: Overall weight multiplier for this scorer
-        """
-        super().__init__(weight=weight)
-        self._exact_types = {}  # Fast lookup for simple extensions
-        self._regex_types = []  # Fallback for complex patterns
-        
-        # Split into exact vs regex matchers for performance
-        for pattern, score in type_weights.items():
-            if pattern.startswith('.') and pattern.endswith('$'):
-                ext = pattern[1:-1]
-                self._exact_types[ext] = score
-            else:
-                self._regex_types.append((re.compile(pattern), score))
-                
-        # Sort complex patterns by score for early exit
-        self._regex_types.sort(key=lambda x: -x[1])
-
-    @staticmethod
-    @lru_cache(maxsize=10000)
-    def _quick_extension(url: str) -> str:
-        """Extract file extension ultra-fast without regex/splits.
-        
-        Handles:
-        - Basic extensions: "example.html" -> "html"
-        - Query strings: "page.php?id=1" -> "php" 
-        - Fragments: "doc.pdf#page=1" -> "pdf"
-        - Path params: "file.jpg;width=100" -> "jpg"
-        
-        Args:
-            url: URL to extract extension from
-            
-        Returns:
-            Extension without dot, or empty string if none found
-        """
-        pos = url.rfind('.')
-        if pos == -1:
-            return ''
-        
-        # Find first non-alphanumeric char after extension
-        end = len(url)
-        for i in range(pos + 1, len(url)):
-            c = url[i]
-            # Stop at query string, fragment, path param or any non-alphanumeric
-            if c in '?#;' or not c.isalnum():
-                end = i
-                break
-                
-        return url[pos + 1:end].lower()
-
-    @lru_cache(maxsize=10000)
-    def _calculate_score(self, url: str) -> float:
-        """Calculate content type score for URL.
-        
-        Uses staged approach:
-        1. Try exact extension match (fast path)
-        2. Fall back to regex patterns if needed
-        
-        Args:
-            url: URL to score
-            
-        Returns:
-            Score between 0.0 and 1.0 * weight
-        """
-        # Fast path: direct extension lookup
-        ext = self._quick_extension(url)
-        if ext:
-            score = self._exact_types.get(ext, None)
-            if score is not None:
-                return score
-                
-        # Slow path: regex patterns
-        for pattern, score in self._regex_types:
-            if pattern.search(url):
-                return score
-
-        return 0.0
-
-class FreshnessScorer(URLScorer):
-    # PERF: Multiple regex compilations for each pattern
-    # PERF: Tries all patterns sequentially 
-    # PERF: Regex pattern matching is expensive
-    # PERF: Int conversion and arithmetic for every match
-    # PERF: Repeated constant value (2024) hardcoded
-    # PERF: No URL caching
-    # PERF: Complex patterns with redundant groups
-    # PERF: Unnecessary list of patterns when could combine
-    """Score URLs based on freshness indicators.
-
-    freshness_scorer = FreshnessScorer(weight=0.9)
-
-    Score based on date indicators in URLs
-    Multiple date format support
-    Recency weighting"""
-
-    def __init__(self, weight: float = 1.0):
-        super().__init__(weight=weight)
-        self.date_patterns = [
-            r"/(\d{4})/(\d{2})/(\d{2})/",  # yyyy/mm/dd
-            r"(\d{4})[-_](\d{2})[-_](\d{2})",  # yyyy-mm-dd
-            r"/(\d{4})/",  # year only
-        ]
-        self._compile_patterns()
-
-    def _compile_patterns(self):
-        """Prepare date patterns"""
-        self.compiled_patterns = [re.compile(p) for p in self.date_patterns]
-
-    def _calculate_score(self, url: str) -> float:
-        """Calculate score based on date indicators"""
-        for pattern in self.compiled_patterns:
-            if match := pattern.search(url):
-                year = int(match.group(1))
-                # Score higher for more recent years
-                return 1.0 - (2024 - year) * 0.1
-        return 0.5  # Default score for URLs without dates
-
-class FastFreshnessScorer(FastURLScorer):
-    __slots__ = ('_weight', '_date_pattern', '_current_year')
-
-    def __init__(self, weight: float = 1.0, current_year: int = 2024):
-        """Initialize freshness scorer.
-        
-        Extracts and scores dates from URLs using format:
-        - YYYY/MM/DD 
-        - YYYY-MM-DD
-        - YYYY_MM_DD
-        - YYYY (year only)
-        
-        Args:
-            weight: Score multiplier
-            current_year: Year to calculate freshness against (default 2024)
-        """
-        super().__init__(weight=weight)
-        self._current_year = current_year
-        
-        # Combined pattern for all date formats
-        # Uses non-capturing groups (?:) and alternation
-        self._date_pattern = re.compile(
-            r'(?:/'  # Path separator
-            r'|[-_])'  # or date separators
-            r'((?:19|20)\d{2})'  # Year group (1900-2099)
-            r'(?:'  # Optional month/day group
-            r'(?:/|[-_])'  # Date separator  
-            r'(?:\d{2})'  # Month
-            r'(?:'  # Optional day
-            r'(?:/|[-_])'  # Date separator
-            r'(?:\d{2})'  # Day
-            r')?'  # Day is optional
-            r')?'  # Month/day group is optional
-        )
-
-    @lru_cache(maxsize=10000)
-    def _extract_year(self, url: str) -> Optional[int]:
-        """Extract the most recent year from URL.
-        
-        Args:
-            url: URL to extract year from
-            
-        Returns:
-            Year as int or None if no valid year found
-        """
-        matches = self._date_pattern.finditer(url)
-        latest_year = None
-        
-        # Find most recent year
-        for match in matches:
-            year = int(match.group(1))
-            if (year <= self._current_year and  # Sanity check
-                (latest_year is None or year > latest_year)):
-                latest_year = year
-                
-        return latest_year
-
-    @lru_cache(maxsize=10000) 
-    def _calculate_score(self, url: str) -> float:
-        """Calculate freshness score based on URL date.
-        
-        More recent years score higher. Uses pre-computed scoring
-        table for common year differences.
-        
-        Args:
-            url: URL to score
-            
-        Returns:
-            Score between 0.0 and 1.0 * weight
-        """
-        year = self._extract_year(url)
-        if year is None:
-            return 0.5  # Default score
-            
-        # Use lookup table for common year differences
-        year_diff = self._current_year - year
-        if year_diff < len(_FRESHNESS_SCORES):
-            return _FRESHNESS_SCORES[year_diff]
-            
-        # Fallback calculation for older content
-        return max(0.1, 1.0 - year_diff * 0.1)
-
-class DomainAuthorityScorer(URLScorer):
-    # PERF: URL parsing on every score calculation
-    # PERF: Repeated domain extraction
-    # PERF: Case conversion on every lookup
-    # PERF: Dict lookup without caching
-    # PERF: Processes full URL when only needs domain
-    # PERF: No fast path for common domains
-    # PERF: Netloc includes port which requires extra processing
-    """Score URLs based on domain authority.
-
-    authority_scorer = DomainAuthorityScorer({
-        "python.org": 1.0,
-        "github.com": 0.9,
-        "medium.com": 0.7
-    })
-
-    Score based on domain importance
-    Configurable domain weights
-    Default weight for unknown domains"""
-
-    def __init__(
-        self,
-        domain_weights: Dict[str, float],
-        default_weight: float = 0.5,
-        weight: float = 1.0,
-    ):
-        super().__init__(weight=weight)
-        self.domain_weights = domain_weights
-        self.default_weight = default_weight
-
-    def _calculate_score(self, url: str) -> float:
-        """Calculate score based on domain authority"""
-        domain = urlparse(url).netloc.lower()
-        return self.domain_weights.get(domain, self.default_weight)
-
-class FastDomainAuthorityScorer(FastURLScorer):
-    __slots__ = ('_weight', '_domain_weights', '_default_weight', '_top_domains')
-    
-    def __init__(
-        self,
-        domain_weights: Dict[str, float],
-        default_weight: float = 0.5,
-        weight: float = 1.0,
-    ):
-        """Initialize domain authority scorer.
-        
-        Args:
-            domain_weights: Dict mapping domains to authority scores
-            default_weight: Score for unknown domains
-            weight: Overall scorer weight multiplier
-            
-        Example:
-            {
-                'python.org': 1.0,
-                'github.com': 0.9,
-                'medium.com': 0.7
-            }
-        """
-        super().__init__(weight=weight)
-        
-        # Pre-process domains for faster lookup
-        self._domain_weights = {
-            domain.lower(): score 
-            for domain, score in domain_weights.items()
-        }
-        self._default_weight = default_weight
-        
-        # Cache top domains for fast path
-        self._top_domains = {
-            domain: score
-            for domain, score in sorted(
-                domain_weights.items(), 
-                key=lambda x: -x[1]
-            )[:5]  # Keep top 5 highest scoring domains
-        }
-
-    @staticmethod
-    @lru_cache(maxsize=10000)
-    def _extract_domain(url: str) -> str:
-        """Extract domain from URL ultra-fast.
-        
-        Handles:
-        - Basic domains: "example.com"
-        - Subdomains: "sub.example.com" 
-        - Ports: "example.com:8080"
-        - IPv4: "192.168.1.1"
-        
-        Args:
-            url: Full URL to extract domain from
-            
-        Returns:
-            Lowercase domain without port
-        """
-        # Find domain start
-        start = url.find('://') 
-        if start == -1:
-            start = 0
-        else:
-            start += 3
-            
-        # Find domain end
-        end = url.find('/', start)
-        if end == -1:
-            end = url.find('?', start)
-            if end == -1:
-                end = url.find('#', start)
-                if end == -1:
-                    end = len(url)
-                    
-        # Extract domain and remove port
-        domain = url[start:end]
-        port_idx = domain.rfind(':')
-        if port_idx != -1:
-            domain = domain[:port_idx]
-            
-        return domain.lower()
-
-    @lru_cache(maxsize=10000)
-    def _calculate_score(self, url: str) -> float:
-        """Calculate domain authority score.
-        
-        Uses staged approach:
-        1. Check top domains (fastest)
-        2. Check full domain weights
-        3. Return default weight
-        
-        Args:
-            url: URL to score
-            
-        Returns:
-            Authority score between 0.0 and 1.0 * weight
-        """
-        domain = self._extract_domain(url)
-        
-        # Fast path: check top domains first
-        score = self._top_domains.get(domain)
-        if score is not None:
-            return score
-            
-        # Regular path: check all domains
-        return self._domain_weights.get(domain, self._default_weight)
-
-def create_balanced_scorer() -> CompositeScorer:
-    """Create a balanced composite scorer"""
-    return CompositeScorer(
-        [
-            KeywordRelevanceScorer(
-                keywords=["article", "blog", "news", "research"], weight=1.0
-            ),
-            PathDepthScorer(optimal_depth=3, weight=0.7),
-            ContentTypeScorer(
-                type_weights={r"\.html?$": 1.0, r"\.pdf$": 0.8, r"\.xml$": 0.6},
-                weight=0.8,
-            ),
-            FreshnessScorer(weight=0.9),
-        ]
-    )
-
-def create_balanced_fast_freshness_scorer() -> CompositeScorer:
-    """Create a balanced composite scorer with fast freshness scorer"""
-    return FastCompositeScorer(
-        [
-            FastKeywordRelevanceScorer(
-                keywords=["article", "blog", "news", "research"], weight=1.0
-            ),
-            FastPathDepthScorer(optimal_depth=3, weight=0.7),
-            FastContentTypeScorer(
-                type_weights={r"\.html?$": 1.0, r"\.pdf$": 0.8, r"\.xml$": 0.6},
-                weight=0.8,
-            ),
-            FastFreshnessScorer(weight=0.9),
-        ]
-    )
-
-# Example Usage:
-"""
-# Create a composite scorer
-scorer = CompositeScorer([
-    KeywordRelevanceScorer(["python", "programming"], weight=1.0),
-    PathDepthScorer(optimal_depth=2, weight=0.7),
-    FreshnessScorer(weight=0.8),
-    DomainAuthorityScorer(
-        domain_weights={
-            "python.org": 1.0,
-            "github.com": 0.9,
-            "medium.com": 0.7
-        },
-        weight=0.9
-    )
-])
-
-# Score a URL
-score = scorer.score("https://python.org/article/2024/01/new-features")
-
-# Access statistics
-print(f"Average score: {scorer.stats.average_score}")
-print(f"URLs scored: {scorer.stats.urls_scored}")
-"""
-
-
-def run_scorer_performance_test():
-    import time
-    import random
-    from itertools import cycle
-    import sys
-
-    # Generate varied test URLs
-    base_urls = [
-        # News/blog articles with dates
-        "https://example.com/2024/01/article-123",
-        "https://news.com/2023-12-31/breaking-news",
-        "https://blog.site.com/2022_11_15/tech-update",
-        
-        # Different content types
-        "https://docs.example.com/report.pdf",
-        "https://site.com/page.html?q=test",
-        "https://api.service.com/data.json",
-        
-        # Various domain authorities
-        "https://python.org/downloads",
-        "https://github.com/repo/code",
-        "https://medium.com/@user/post",
-        
-        # Different path depths
-        "https://site.com/category/subcategory/product/detail",
-        "https://shop.com/items",
-        "https://edu.org/courses/cs/intro/lecture1",
-    ]
-
-    # Create variations
-    test_urls = []
-    years = list(range(2020, 2025))
-    domains = ["example.com", "python.org", "github.com", "medium.com"]
-    extensions = ["html", "pdf", "php", "jsx"]
-    
-    for base in base_urls:
-        test_urls.append(base)
-        # Add year variations
-        for year in years:
-            test_urls.append(f"https://blog.com/{year}/post-{random.randint(1,999)}")
-        # Add domain variations    
-        for domain in domains:
-            test_urls.append(f"https://{domain}/article-{random.randint(1,999)}")
-        # Add extension variations    
-        for ext in extensions:
-            test_urls.append(f"https://site.com/doc-{random.randint(1,999)}.{ext}")
-
-    # Multiply dataset
-    test_urls = test_urls * 5000  # Creates ~300k URLs
-    
-    def benchmark(name: str, scorer, urls, warmup=True):
-        if warmup:
-            for url in urls[:100]:  # Warmup with subset
-                scorer.score(url)
-
-        start = time.perf_counter_ns()
-        for url in urls:
-            scorer.score(url)
-        elapsed = (time.perf_counter_ns() - start) / 1_000_000  # Convert to ms
-        
-        print(
-            f"{name:<35} {elapsed:>8.3f} ms  ({len(urls)/elapsed*1000:,.0f} URLs/sec)"
-        )
-        return elapsed
-
-    print("\nBenchmarking original vs optimized scorers...")
-    print("-" * 75)
-
-    # Initialize test data
-    domain_weights = {"python.org": 1.0, "github.com": 0.9, "medium.com": 0.7}
-    type_weights = {".html$": 1.0, ".pdf$": 0.8, ".php$": 0.6}
-    keywords = ["python", "article", "blog", "docs"]
-
-    # Original implementations
-    keyword_scorer = KeywordRelevanceScorer(keywords=keywords, weight=1.0)
-    path_scorer = PathDepthScorer(optimal_depth=3, weight=0.7)
-    content_scorer = ContentTypeScorer(type_weights=type_weights, weight=0.8)
-    freshness_scorer = FreshnessScorer(weight=0.9)
-    domain_scorer = DomainAuthorityScorer(domain_weights=domain_weights, weight=1.0)
-
-    # Fast implementations
-    fast_keyword_scorer = FastKeywordRelevanceScorer(keywords=keywords, weight=1.0)
-    fast_path_scorer = FastPathDepthScorer(optimal_depth=3, weight=0.7)
-    fast_content_scorer = FastContentTypeScorer(type_weights=type_weights, weight=0.8)
-    fast_freshness_scorer = FastFreshnessScorer(weight=0.9)
-    fast_domain_scorer = FastDomainAuthorityScorer(domain_weights=domain_weights, weight=1.0)
-
-    # Test subset for individual scorers
-    test_subset = test_urls[:1000]
-
-    print("\nIndividual Scorer Performance (first 1000 URLs):")
-    
-    print("\nKeyword Relevance Scorers:")
-    benchmark("Original Keyword Scorer", keyword_scorer, test_subset)
-    benchmark("Optimized Keyword Scorer", fast_keyword_scorer, test_subset)
-
-    print("\nPath Depth Scorers:")
-    benchmark("Original Path Scorer", path_scorer, test_subset)
-    benchmark("Optimized Path Scorer", fast_path_scorer, test_subset)
-
-    print("\nContent Type Scorers:")
-    benchmark("Original Content Scorer", content_scorer, test_subset)
-    benchmark("Optimized Content Scorer", fast_content_scorer, test_subset)
-
-    print("\nFreshness Scorers:")
-    benchmark("Original Freshness Scorer", freshness_scorer, test_subset)
-    benchmark("Optimized Freshness Scorer", fast_freshness_scorer, test_subset)
-
-    print("\nDomain Authority Scorers:")
-    benchmark("Original Domain Scorer", domain_scorer, test_subset)
-    benchmark("Optimized Domain Scorer", fast_domain_scorer, test_subset)
-
-    # Test composite scorers
-    print("\nComposite Scorer Performance (all URLs):")
-    
-    original_composite = CompositeScorer([
-        keyword_scorer, path_scorer, content_scorer, 
-        freshness_scorer, domain_scorer
-    ])
-    
-    fast_composite = FastCompositeScorer([
-        fast_keyword_scorer, fast_path_scorer, fast_content_scorer,
-        fast_freshness_scorer, fast_domain_scorer
-    ])
-
-    benchmark("Original Composite Scorer", original_composite, test_urls)
-    benchmark("Optimized Composite Scorer", fast_composite, test_urls)
-
-    # Memory usage
-    print("\nMemory Usage per Scorer:")
-    print(f"Original Keyword Scorer: {sys.getsizeof(keyword_scorer):,} bytes")
-    print(f"Optimized Keyword Scorer: {sys.getsizeof(fast_keyword_scorer):,} bytes")
-    print(f"Original Path Scorer: {sys.getsizeof(path_scorer):,} bytes")
-    print(f"Optimized Path Scorer: {sys.getsizeof(fast_path_scorer):,} bytes")
-    print(f"Original Content Scorer: {sys.getsizeof(content_scorer):,} bytes")
-    print(f"Optimized Content Scorer: {sys.getsizeof(fast_content_scorer):,} bytes")
-    print(f"Original Freshness Scorer: {sys.getsizeof(freshness_scorer):,} bytes")
-    print(f"Optimized Freshness Scorer: {sys.getsizeof(fast_freshness_scorer):,} bytes")
-    print(f"Original Domain Scorer: {sys.getsizeof(domain_scorer):,} bytes")
-    print(f"Optimized Domain Scorer: {sys.getsizeof(fast_domain_scorer):,} bytes")
-    print(f"Original Composite: {sys.getsizeof(original_composite):,} bytes")
-    print(f"Optimized Composite: {sys.getsizeof(fast_composite):,} bytes")
-
-def test_scorers():
-    import time
-    from itertools import chain
-
-    test_cases = [
-        # Keyword Scorer Tests
-        {
-            "scorer_type": "keyword",
-            "config": {
-                "keywords": ["python", "blog"],
-                "weight": 1.0,
-                "case_sensitive": False
-            },
-            "urls": {
-                "https://example.com/python-blog": 1.0,
-                "https://example.com/PYTHON-BLOG": 1.0,
-                "https://example.com/python-only": 0.5,
-                "https://example.com/other": 0.0
-            }
-        },
-        
-        # Path Depth Scorer Tests
-        {
-            "scorer_type": "path_depth",
-            "config": {
-                "optimal_depth": 2,
-                "weight": 1.0
-            },
-            "urls": {
-                "https://example.com/a/b": 1.0,
-                "https://example.com/a": 0.5,
-                "https://example.com/a/b/c": 0.5,
-                "https://example.com": 0.33333333
-            }
-        },
-        
-        # Content Type Scorer Tests
-        {
-            "scorer_type": "content_type",
-            "config": {
-                "type_weights": {
-                    ".html$": 1.0,
-                    ".pdf$": 0.8,
-                    ".jpg$": 0.6
-                },
-                "weight": 1.0
-            },
-            "urls": {
-                "https://example.com/doc.html": 1.0,
-                "https://example.com/doc.pdf": 0.8,
-                "https://example.com/img.jpg": 0.6,
-                "https://example.com/other.txt": 0.0
-            }
-        },
-        
-        # Freshness Scorer Tests
-        {
-            "scorer_type": "freshness",
-            "config": {
-                "weight": 1.0,  # Remove current_year since original doesn't support it
-            },
-            "urls": {
-                "https://example.com/2024/01/post": 1.0,
-                "https://example.com/2023/12/post": 0.9,
-                "https://example.com/2022/post": 0.8,
-                "https://example.com/no-date": 0.5
-            }
-        },
-        
-        # Domain Authority Scorer Tests
-        {
-            "scorer_type": "domain",
-            "config": {
-                "domain_weights": {
-                    "python.org": 1.0,
-                    "github.com": 0.8,
-                    "medium.com": 0.6
-                },
-                "default_weight": 0.3,
-                "weight": 1.0
-            },
-            "urls": {
-                "https://python.org/about": 1.0,
-                "https://github.com/repo": 0.8,
-                "https://medium.com/post": 0.6,
-                "https://unknown.com": 0.3
-            }
-        }
-    ]
-
-    def create_scorer(scorer_type, config):
-        if scorer_type == "keyword":
-            return (
-                KeywordRelevanceScorer(**config),
-                FastKeywordRelevanceScorer(**config)
-            )
-        elif scorer_type == "path_depth":
-            return (
-                PathDepthScorer(**config),
-                FastPathDepthScorer(**config)
-            )
-        elif scorer_type == "content_type":
-            return (
-                ContentTypeScorer(**config),
-                FastContentTypeScorer(**config)
-            )
-        elif scorer_type == "freshness":
-            return (
-        FreshnessScorer(**config),
-        FastFreshnessScorer(**config, current_year=2024)
-            )
-        elif scorer_type == "domain":
-            return (
-                DomainAuthorityScorer(**config),
-                FastDomainAuthorityScorer(**config)
-            )
-
-    def run_accuracy_test():
-        print("\nAccuracy Tests:")
-        print("-" * 50)
-        
-        all_passed = True
-        for test_case in test_cases:
-            print(f"\nTesting {test_case['scorer_type']} scorer:")
-            original, fast = create_scorer(
-                test_case['scorer_type'],
-                test_case['config']
-            )
-            
-            for url, expected in test_case['urls'].items():
-                orig_score = round(original.score(url), 8)
-                fast_score = round(fast.score(url), 8)
-                expected = round(expected, 8)
-                
-                if abs(orig_score - expected) > 0.00001:
-                    print(f"❌ Original Failed: URL '{url}'")
-                    print(f"   Expected: {expected}, Got: {orig_score}")
-                    all_passed = False
-                else:
-                    print(f"✅ Original Passed: URL '{url}'")
-                    
-                if abs(fast_score - expected) > 0.00001:
-                    print(f"❌ Fast Failed: URL '{url}'")
-                    print(f"   Expected: {expected}, Got: {fast_score}")
-                    all_passed = False
-                else:
-                    print(f"✅ Fast Passed: URL '{url}'")
-                    
-        return all_passed
-
-    def run_composite_test():
-        print("\nTesting Composite Scorer:")
-        print("-" * 50)
-        
-        # Create test data
-        test_urls = {
-            "https://python.org/blog/2024/01/new-release.html":0.86666667,
-            "https://github.com/repo/old-code.pdf": 0.62,
-            "https://unknown.com/random": 0.26
-        }
-        
-        # Create composite scorers with all types
-        original_scorers = []
-        fast_scorers = []
-        
-        for test_case in test_cases:
-            orig, fast = create_scorer(
-                test_case['scorer_type'],
-                test_case['config']
-            )
-            original_scorers.append(orig)
-            fast_scorers.append(fast)
-            
-        original_composite = CompositeScorer(original_scorers, normalize=True)
-        fast_composite = FastCompositeScorer(fast_scorers, normalize=True)
-        
-        all_passed = True
-        for url, expected in test_urls.items():
-            orig_score = round(original_composite.score(url), 8)
-            fast_score = round(fast_composite.score(url), 8)
-            
-            if abs(orig_score - expected) > 0.00001:
-                print(f"❌ Original Composite Failed: URL '{url}'")
-                print(f"   Expected: {expected}, Got: {orig_score}")
-                all_passed = False
-            else:
-                print(f"✅ Original Composite Passed: URL '{url}'")
-                
-            if abs(fast_score - expected) > 0.00001:
-                print(f"❌ Fast Composite Failed: URL '{url}'")
-                print(f"   Expected: {expected}, Got: {fast_score}")
-                all_passed = False
-            else:
-                print(f"✅ Fast Composite Passed: URL '{url}'")
-                
-        return all_passed
-
-    # Run tests
-    print("Running Scorer Tests...")
-    accuracy_passed = run_accuracy_test()
-    composite_passed = run_composite_test()
-    
-    if accuracy_passed and composite_passed:
-        print("\n✨ All tests passed!")
-        # Note: Already have performance tests in run_scorer_performance_test()
-    else:
-        print("\n❌ Some tests failed!")
-
-    
-
-if __name__ == "__main__":
-    run_scorer_performance_test()
-    # test_scorers()
\ No newline at end of file