diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index 9a8d621c..d02e6c55 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -744,18 +744,49 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
                     )
                     redirected_url = page.url
                 except Error as e:
-                    raise RuntimeError(f"Failed on navigating ACS-GOTO:\n{str(e)}")
+                    # Allow navigation to be aborted when downloading files
+                    # This is expected behavior for downloads in some browser engines
+                    if 'net::ERR_ABORTED' in str(e) and self.browser_config.accept_downloads:
+                        self.logger.info(
+                            message=f"Navigation aborted, likely due to file download: {url}",
+                            tag="GOTO",
+                            params={"url": url},
+                        )
+                        response = None
+                    else:
+                        raise RuntimeError(f"Failed on navigating ACS-GOTO:\n{str(e)}")
 
                 await self.execute_hook(
                     "after_goto", page, context=context, url=url, response=response, config=config
                 )
 
+                # ──────────────────────────────────────────────────────────────
+                # Walk the redirect chain.  Playwright returns only the last
+                # hop, so we trace the `request.redirected_from` links until the
+                # first response that differs from the final one and surface its
+                # status-code.
+                # ──────────────────────────────────────────────────────────────
                 if response is None:
                     status_code = 200
                     response_headers = {}
                 else:
-                    status_code = response.status
-                    response_headers = response.headers
+                    first_resp = response
+                    req = response.request
+                    while req and req.redirected_from:
+                        prev_req = req.redirected_from
+                        prev_resp = await prev_req.response()
+                        if prev_resp:                       # keep earliest
+                            first_resp = prev_resp
+                        req = prev_req
+                
+                    status_code = first_resp.status
+                    response_headers = first_resp.headers
+                # if response is None:
+                #     status_code = 200
+                #     response_headers = {}
+                # else:
+                #     status_code = response.status
+                #     response_headers = response.headers
 
             else:
                 status_code = 200
@@ -1435,12 +1466,32 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
             num_segments = (page_height // viewport_height) + 1
             for i in range(num_segments):
                 y_offset = i * viewport_height
+                # Special handling for the last segment
+                if i == num_segments - 1:
+                    last_part_height = page_height % viewport_height
+                    
+                    # If page_height is an exact multiple of viewport_height,
+                    # we don't need an extra segment
+                    if last_part_height == 0:
+                        # Skip last segment if page height is exact multiple of viewport
+                        break
+                    
+                    # Adjust viewport to exactly match the remaining content height
+                    await page.set_viewport_size({"width": page_width, "height": last_part_height})
+                
                 await page.evaluate(f"window.scrollTo(0, {y_offset})")
                 await asyncio.sleep(0.01)  # wait for render
-                seg_shot = await page.screenshot(full_page=False)
+                
+                # Capture the current segment
+                # Note: Using compression options (format, quality) would go here
+                seg_shot = await page.screenshot(full_page=False, type="jpeg", quality=85)
+                # seg_shot = await page.screenshot(full_page=False)
                 img = Image.open(BytesIO(seg_shot)).convert("RGB")
                 segments.append(img)
 
+            # Reset viewport to original size after capturing segments
+            await page.set_viewport_size({"width": page_width, "height": viewport_height})
+
             total_height = sum(img.height for img in segments)
             stitched = Image.new("RGB", (segments[0].width, total_height))
             offset = 0
diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py
index 19b98522..cb221b72 100644
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -360,7 +360,7 @@ class AsyncWebCrawler:
                         pdf_data=pdf_data,
                         verbose=config.verbose,
                         is_raw_html=True if url.startswith("raw:") else False,
-                        redirected_url=async_response.redirected_url, 
+                        redirected_url=async_response.redirected_url,
                         **kwargs,
                     )
 
@@ -503,7 +503,7 @@ class AsyncWebCrawler:
             tables = media.pop("tables", [])
             links = result.links.model_dump()
             metadata = result.metadata
-            
+
         fit_html = preprocess_html_for_schema(html_content=html, text_threshold= 500, max_size= 300_000)
 
         ################################
@@ -585,11 +585,13 @@ class AsyncWebCrawler:
             # Choose content based on input_format
             content_format = config.extraction_strategy.input_format
             if content_format == "fit_markdown" and not markdown_result.fit_markdown:
-                self.logger.warning(
-                    message="Fit markdown requested but not available. Falling back to raw markdown.",
-                    tag="EXTRACT",
-                    params={"url": _url},
-                )
+
+                self.logger.url_status(
+                        url=_url,
+                        success=bool(html),
+                        timing=time.perf_counter() - t1,
+                        tag="EXTRACT",
+                    )
                 content_format = "markdown"
 
             content = {
@@ -613,11 +615,12 @@ class AsyncWebCrawler:
             )
 
             # Log extraction completion
-            self.logger.info(
-                message="Completed for {url:.50}... | Time: {timing}s",
-                tag="EXTRACT",
-                params={"url": _url, "timing": time.perf_counter() - t1},
-            )
+            self.logger.url_status(
+                        url=_url,
+                        success=bool(html),
+                        timing=time.perf_counter() - t1,
+                        tag="EXTRACT",
+                    )
 
         # Apply HTML formatting if requested
         if config.prettiify:
diff --git a/crawl4ai/browser_profiler.py b/crawl4ai/browser_profiler.py
index 41efd4b0..4607e4b5 100644
--- a/crawl4ai/browser_profiler.py
+++ b/crawl4ai/browser_profiler.py
@@ -615,9 +615,18 @@ class BrowserProfiler:
         self.logger.info(f"Debugging port: {debugging_port}", tag="CDP")
         self.logger.info(f"Headless mode: {headless}", tag="CDP")
         
+        # create browser config
+        browser_config = BrowserConfig(
+            browser_type=browser_type,
+            headless=headless,
+            user_data_dir=profile_path,
+            debugging_port=debugging_port,
+            verbose=True
+        )
+        
         # Create managed browser instance
         managed_browser = ManagedBrowser(
-            browser_type=browser_type,
+            browser_config=browser_config,
             user_data_dir=profile_path,
             headless=headless,
             logger=self.logger,
diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index 1dfbce84..d11e02d0 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -718,13 +718,18 @@ class WebScrapingStrategy(ContentScrapingStrategy):
 
                     # Check flag if we should remove external images
                     if kwargs.get("exclude_external_images", False):
-                        element.decompose()
-                        return False
-                        # src_url_base = src.split('/')[2]
-                        # url_base = url.split('/')[2]
-                        # if url_base not in src_url_base:
-                        #     element.decompose()
-                        #     return False
+                        # Handle relative URLs (which are always from the same domain)
+                        if not src.startswith('http') and not src.startswith('//'):
+                            return True  # Keep relative URLs
+                        
+                        # For absolute URLs, compare the base domains using the existing function
+                        src_base_domain = get_base_domain(src)
+                        url_base_domain = get_base_domain(url)
+                        
+                        # If the domains don't match and both are valid, the image is external
+                        if src_base_domain and url_base_domain and src_base_domain != url_base_domain:
+                            element.decompose()
+                            return False
 
                     # if kwargs.get('exclude_social_media_links', False):
                     #     if image_src_base_domain in exclude_social_media_domains:
diff --git a/crawl4ai/deep_crawling/bff_strategy.py b/crawl4ai/deep_crawling/bff_strategy.py
index 65d4e819..7779c9f4 100644
--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -150,6 +150,14 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                 self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping crawl")
                 break
                 
+            # Calculate how many more URLs we can process in this batch
+            remaining = self.max_pages - self._pages_crawled
+            batch_size = min(BATCH_SIZE, remaining)
+            if batch_size <= 0:
+                # No more pages to crawl
+                self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping crawl")
+                break
+                
             batch: List[Tuple[float, int, str, Optional[str]]] = []
             # Retrieve up to BATCH_SIZE items from the priority queue.
             for _ in range(BATCH_SIZE):
@@ -184,6 +192,10 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
                 # Count only successful crawls toward max_pages limit
                 if result.success:
                     self._pages_crawled += 1
+                    # Check if we've reached the limit during batch processing
+                    if self._pages_crawled >= self.max_pages:
+                        self.logger.info(f"Max pages limit ({self.max_pages}) reached during batch, stopping crawl")
+                        break  # Exit the generator
                 
                 yield result
                 
diff --git a/crawl4ai/deep_crawling/bfs_strategy.py b/crawl4ai/deep_crawling/bfs_strategy.py
index 48c116dd..950c3980 100644
--- a/crawl4ai/deep_crawling/bfs_strategy.py
+++ b/crawl4ai/deep_crawling/bfs_strategy.py
@@ -157,6 +157,11 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
         results: List[CrawlResult] = []
 
         while current_level and not self._cancel_event.is_set():
+            # Check if we've already reached max_pages before starting a new level
+            if self._pages_crawled >= self.max_pages:
+                self.logger.info(f"Max pages limit ({self.max_pages}) reached, stopping crawl")
+                break
+            
             next_level: List[Tuple[str, Optional[str]]] = []
             urls = [url for url, _ in current_level]
 
@@ -221,6 +226,10 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
                 # Count only successful crawls
                 if result.success:
                     self._pages_crawled += 1
+                    # Check if we've reached the limit during batch processing
+                    if self._pages_crawled >= self.max_pages:
+                        self.logger.info(f"Max pages limit ({self.max_pages}) reached during batch, stopping crawl")
+                        break  # Exit the generator
                 
                 results_count += 1
                 yield result
diff --git a/crawl4ai/deep_crawling/dfs_strategy.py b/crawl4ai/deep_crawling/dfs_strategy.py
index f79f9628..0eca58e3 100644
--- a/crawl4ai/deep_crawling/dfs_strategy.py
+++ b/crawl4ai/deep_crawling/dfs_strategy.py
@@ -49,6 +49,10 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
                 # Count only successful crawls toward max_pages limit
                 if result.success:
                     self._pages_crawled += 1
+                    # Check if we've reached the limit during batch processing
+                    if self._pages_crawled >= self.max_pages:
+                        self.logger.info(f"Max pages limit ({self.max_pages}) reached during batch, stopping crawl")
+                        break  # Exit the generator
                     
                     # Only discover links from successful crawls
                     new_links: List[Tuple[str, Optional[str]]] = []
@@ -94,6 +98,10 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
                 # and only discover links from successful crawls
                 if result.success:
                     self._pages_crawled += 1
+                    # Check if we've reached the limit during batch processing
+                    if self._pages_crawled >= self.max_pages:
+                        self.logger.info(f"Max pages limit ({self.max_pages}) reached during batch, stopping crawl")
+                        break  # Exit the generator
                     
                     new_links: List[Tuple[str, Optional[str]]] = []
                     await self.link_discovery(result, url, depth, visited, new_links, depths)
diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index 46207ca7..a50b234d 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -42,6 +42,29 @@ from itertools import chain
 from collections import deque
 from typing import  Generator, Iterable
 
+# Monkey patch to fix wildcard handling in urllib.robotparser
+from urllib.robotparser import RuleLine
+import re
+
+original_applies_to = RuleLine.applies_to
+
+def patched_applies_to(self, filename):
+   # Handle wildcards in paths
+   if '*' in self.path or '%2A' in self.path or self.path in ("*", "%2A"):
+       pattern = self.path.replace('%2A', '*')
+       pattern = re.escape(pattern).replace('\\*', '.*')
+       pattern = '^' + pattern
+       if pattern.endswith('\\$'):
+           pattern = pattern[:-2] + '$'
+       try:
+           return bool(re.match(pattern, filename))
+       except re.error:
+           return original_applies_to(self, filename)
+   return original_applies_to(self, filename)
+
+RuleLine.applies_to = patched_applies_to
+# Monkey patch ends
+
 def chunk_documents(
     documents: Iterable[str],
     chunk_token_threshold: int,
@@ -303,7 +326,7 @@ class RobotsParser:
                 robots_url = f"{scheme}://{domain}/robots.txt"
                 
                 async with aiohttp.ClientSession() as session:
-                    async with session.get(robots_url, timeout=2) as response:
+                    async with session.get(robots_url, timeout=2, ssl=False) as response:
                         if response.status == 200:
                             rules = await response.text()
                             self._cache_rules(domain, rules)
diff --git a/deploy/docker/c4ai-doc-context.md b/deploy/docker/c4ai-doc-context.md
index 5b5a81bb..6591c265 100644
--- a/deploy/docker/c4ai-doc-context.md
+++ b/deploy/docker/c4ai-doc-context.md
@@ -403,7 +403,7 @@ async def main():
 
     md_generator = DefaultMarkdownGenerator(
     content_filter=filter,
-    options={"ignore_links": True}
+    options={"ignore_links": True})
 
     # 4) Crawler run config: skip cache, use extraction
     run_conf = CrawlerRunConfig(
@@ -4152,7 +4152,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:
 
 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter
 
 async def main():
@@ -4175,8 +4175,13 @@ async def main():
         verbose=True
     )
 
+    md_generator = DefaultMarkdownGenerator(
+        content_filter=filter,
+        options={"ignore_links": True}
+    )
+
     config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator
     )
 
     async with AsyncWebCrawler() as crawler:
diff --git a/docs/md_v2/core/browser-crawler-config.md b/docs/md_v2/core/browser-crawler-config.md
index 9ea8f2a1..c7c8c166 100644
--- a/docs/md_v2/core/browser-crawler-config.md
+++ b/docs/md_v2/core/browser-crawler-config.md
@@ -273,7 +273,7 @@ In a typical scenario, you define **one** `BrowserConfig` for your crawler sessi
 
 ```python
 import asyncio
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, LLMConfig, LLMContentFilter, DefaultMarkdownGenerator
 from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
 
 async def main():
@@ -298,7 +298,7 @@ async def main():
     # 3) Example LLM content filtering
 
     gemini_config = LLMConfig(
-        provider="gemini/gemini-1.5-pro" 
+        provider="gemini/gemini-1.5-pro", 
         api_token = "env:GEMINI_API_TOKEN"
     )
 
@@ -322,8 +322,9 @@ async def main():
     )
 
     md_generator = DefaultMarkdownGenerator(
-    content_filter=filter,
-    options={"ignore_links": True}
+        content_filter=filter,
+        options={"ignore_links": True}
+    )
 
     # 4) Crawler run config: skip cache, use extraction
     run_conf = CrawlerRunConfig(
diff --git a/docs/md_v2/core/cli.md b/docs/md_v2/core/cli.md
index ff4bf658..ded35f2f 100644
--- a/docs/md_v2/core/cli.md
+++ b/docs/md_v2/core/cli.md
@@ -17,6 +17,9 @@
 - [Configuration Reference](#configuration-reference)
 - [Best Practices & Tips](#best-practices--tips)
 
+## Installation
+The Crawl4AI CLI will be installed automatically when you install the library.
+
 ## Basic Usage
 
 The Crawl4AI CLI (`crwl`) provides a simple interface to the Crawl4AI library:
diff --git a/docs/md_v2/core/markdown-generation.md b/docs/md_v2/core/markdown-generation.md
index e897b2bb..5eac0ab5 100644
--- a/docs/md_v2/core/markdown-generation.md
+++ b/docs/md_v2/core/markdown-generation.md
@@ -233,7 +233,7 @@ prune_filter = PruningContentFilter(
 For intelligent content filtering and high-quality markdown generation, you can use the **LLMContentFilter**. This filter leverages LLMs to generate relevant markdown while preserving the original content's meaning and structure:
 
 ```python
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, LLMConfig, DefaultMarkdownGenerator
 from crawl4ai.content_filter_strategy import LLMContentFilter
 
 async def main():
@@ -255,9 +255,12 @@ async def main():
         chunk_token_threshold=4096,  # Adjust based on your needs
         verbose=True
     )
-
+    md_generator = DefaultMarkdownGenerator(
+        content_filter=filter,
+        options={"ignore_links": True}
+    )
     config = CrawlerRunConfig(
-        content_filter=filter
+        markdown_generator=md_generator,
     )
 
     async with AsyncWebCrawler() as crawler:
diff --git a/pyproject.toml b/pyproject.toml
index be44397e..a208d5d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
     "lxml~=5.3",
     "litellm>=1.53.1",
     "numpy>=1.26.0,<3",
-    "pillow~=10.4",
+    "pillow>=10.4",
     "playwright>=1.49.0",
     "python-dotenv~=1.0",
     "requests~=2.26",
@@ -33,7 +33,6 @@ dependencies = [
     "psutil>=6.1.1",
     "nltk>=3.9.1",
     "playwright",
-    "aiofiles",
     "rich>=13.9.4",
     "cssselect>=1.2.0",
     "httpx>=0.27.2",
diff --git a/requirements.txt b/requirements.txt
index 0bb596d1..b62575d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ aiosqlite~=0.20
 lxml~=5.3
 litellm>=1.53.1
 numpy>=1.26.0,<3
-pillow~=10.4
+pillow>=10.4
 playwright>=1.49.0
 python-dotenv~=1.0
 requests~=2.26
@@ -22,4 +22,7 @@ nltk>=3.9.1
 rich>=13.9.4
 cssselect>=1.2.0
 chardet>=5.2.0
-brotli>=1.1.0
\ No newline at end of file
+brotli>=1.1.0
+fake-useragent>=2.2.0
+pdf2image>=1.17.0
+PyPDF2>=3.0.1
\ No newline at end of file