diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ab493ff..9c5d35bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,52 @@
 # Changelog
 
+## [v0.3.73] - 2024-10-24
+
+### Added
+- Smart overlay removal system in AsyncPlaywrightCrawlerStrategy:
+  - Automatic removal of popups, modals, and cookie notices
+  - Detection and removal of fixed/sticky position elements
+  - Cleaning of empty block elements
+  - Configurable via `remove_overlay_elements` parameter
+- Enhanced screenshot capabilities:
+  - Added `screenshot_wait_for` parameter to control timing
+  - Improved screenshot handling with existing page context
+  - Better error handling with fallback error images
+- New URL normalization utilities:
+  - `normalize_url` function for consistent URL formatting
+  - `is_external_url` function for better link classification
+- Custom base directory support for cache storage:
+  - New `base_directory` parameter in AsyncWebCrawler
+  - Allows specifying alternative locations for `.crawl4ai` folder
+
+### Enhanced
+- Link handling improvements:
+  - Better duplicate link detection
+  - Enhanced internal/external link classification
+  - Improved handling of special URL protocols
+  - Support for anchor links and protocol-relative URLs
+- Configuration refinements:
+  - Streamlined social media domain list
+  - More focused external content filtering
+- LLM extraction strategy:
+  - Added support for separate API base URL via `api_base` parameter
+  - Better handling of base URLs in configuration
+
+### Fixed
+- Screenshot functionality:
+  - Resolved issues with screenshot timing and context
+  - Improved error handling and recovery
+- Link processing:
+  - Fixed URL normalization edge cases
+  - Better handling of invalid URLs
+  - Improved error messages for link processing failures
+
+### Developer Notes
+- The overlay removal system uses advanced JavaScript injection for better compatibility
+- URL normalization handles special cases like mailto:, tel:, and protocol-relative URLs
+- Screenshot system now reuses existing page context for better performance
+- Link processing maintains separate dictionaries for internal and external links to ensure uniqueness
+
 ## [v0.3.72] - 2024-10-22
 
 ### Added
diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index 1ddb32da..fe0b6767 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -51,7 +51,7 @@ class AsyncCrawlerStrategy(ABC):
         pass
     
     @abstractmethod
-    async def take_screenshot(self, url: str) -> str:
+    async def take_screenshot(self, **kwargs) -> str:
         pass
     
     @abstractmethod
@@ -502,13 +502,21 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
             if delay_before_return_html:
                 await asyncio.sleep(delay_before_return_html)
                 
+            # Check for remove_overlay_elements parameter
+            if kwargs.get("remove_overlay_elements", False):
+                await self.remove_overlay_elements(page)
+            
             html = await page.content()
             await self.execute_hook('before_return_html', page, html)
             
             # Check if kwargs has screenshot=True then take screenshot
             screenshot_data = None
             if kwargs.get("screenshot"):
-                screenshot_data = await self.take_screenshot(url)            
+                # Check we have screenshot_wait_for parameter, if we have simply wait for that time
+                screenshot_wait_for = kwargs.get("screenshot_wait_for")
+                if screenshot_wait_for:
+                    await asyncio.sleep(screenshot_wait_for)
+                screenshot_data = await self.take_screenshot(page)          
 
             if self.verbose:
                 print(f"[LOG] ✅ Crawled {url} successfully!")
@@ -559,28 +567,156 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
         results = await asyncio.gather(*tasks, return_exceptions=True)
         return [result if not isinstance(result, Exception) else str(result) for result in results]
 
-    async def take_screenshot(self, url: str, wait_time=1000) -> str:
-        async with await self.browser.new_context(user_agent=self.user_agent) as context:
-            page = await context.new_page()
-            try:
-                await page.goto(url, wait_until="domcontentloaded", timeout=30000)
-                # Wait for a specified time (default is 1 second)
-                await page.wait_for_timeout(wait_time)
-                screenshot = await page.screenshot(full_page=True)
-                return base64.b64encode(screenshot).decode('utf-8')
-            except Exception as e:
-                error_message = f"Failed to take screenshot: {str(e)}"
-                print(error_message)
+    async def remove_overlay_elements(self, page: Page) -> None:
+        """
+        Removes popup overlays, modals, cookie notices, and other intrusive elements from the page.
+        
+        Args:
+            page (Page): The Playwright page instance
+        """
+        remove_overlays_js = """
+        async () => {
+            // Function to check if element is visible
+            const isVisible = (elem) => {
+                const style = window.getComputedStyle(elem);
+                return style.display !== 'none' && 
+                       style.visibility !== 'hidden' && 
+                       style.opacity !== '0';
+            };
 
-                # Generate an error image
-                img = Image.new('RGB', (800, 600), color='black')
-                draw = ImageDraw.Draw(img)
-                font = ImageFont.load_default()
-                draw.text((10, 10), error_message, fill=(255, 255, 255), font=font)
+            // Common selectors for popups and overlays
+            const commonSelectors = [
+                // Close buttons first
+                'button[class*="close" i]', 'button[class*="dismiss" i]', 
+                'button[aria-label*="close" i]', 'button[title*="close" i]',
+                'a[class*="close" i]', 'span[class*="close" i]',
                 
-                buffered = BytesIO()
-                img.save(buffered, format="JPEG")
-                return base64.b64encode(buffered.getvalue()).decode('utf-8')
-            finally:
-                await page.close()
+                // Cookie notices
+                '[class*="cookie-banner" i]', '[id*="cookie-banner" i]',
+                '[class*="cookie-consent" i]', '[id*="cookie-consent" i]',
+                
+                // Newsletter/subscription dialogs
+                '[class*="newsletter" i]', '[class*="subscribe" i]',
+                
+                // Generic popups/modals
+                '[class*="popup" i]', '[class*="modal" i]', 
+                '[class*="overlay" i]', '[class*="dialog" i]',
+                '[role="dialog"]', '[role="alertdialog"]'
+            ];
+
+            // Try to click close buttons first
+            for (const selector of commonSelectors.slice(0, 6)) {
+                const closeButtons = document.querySelectorAll(selector);
+                for (const button of closeButtons) {
+                    if (isVisible(button)) {
+                        try {
+                            button.click();
+                            await new Promise(resolve => setTimeout(resolve, 100));
+                        } catch (e) {
+                            console.log('Error clicking button:', e);
+                        }
+                    }
+                }
+            }
+
+            // Remove remaining overlay elements
+            const removeOverlays = () => {
+                // Find elements with high z-index
+                const allElements = document.querySelectorAll('*');
+                for (const elem of allElements) {
+                    const style = window.getComputedStyle(elem);
+                    const zIndex = parseInt(style.zIndex);
+                    const position = style.position;
+                    
+                    if (
+                        isVisible(elem) && 
+                        (zIndex > 999 || position === 'fixed' || position === 'absolute') &&
+                        (
+                            elem.offsetWidth > window.innerWidth * 0.5 ||
+                            elem.offsetHeight > window.innerHeight * 0.5 ||
+                            style.backgroundColor.includes('rgba') ||
+                            parseFloat(style.opacity) < 1
+                        )
+                    ) {
+                        elem.remove();
+                    }
+                }
+
+                // Remove elements matching common selectors
+                for (const selector of commonSelectors) {
+                    const elements = document.querySelectorAll(selector);
+                    elements.forEach(elem => {
+                        if (isVisible(elem)) {
+                            elem.remove();
+                        }
+                    });
+                }
+            };
+
+            // Remove overlay elements
+            removeOverlays();
+
+            // Remove any fixed/sticky position elements at the top/bottom
+            const removeFixedElements = () => {
+                const elements = document.querySelectorAll('*');
+                elements.forEach(elem => {
+                    const style = window.getComputedStyle(elem);
+                    if (
+                        (style.position === 'fixed' || style.position === 'sticky') &&
+                        isVisible(elem)
+                    ) {
+                        elem.remove();
+                    }
+                });
+            };
+
+            removeFixedElements();
+            
+            // Remove empty block elements as: div, p, span, etc.
+            const removeEmptyBlockElements = () => {
+                const blockElements = document.querySelectorAll('div, p, span, section, article, header, footer, aside, nav, main, ul, ol, li, dl, dt, dd, h1, h2, h3, h4, h5, h6');
+                blockElements.forEach(elem => {
+                    if (elem.innerText.trim() === '') {
+                        elem.remove();
+                    }
+                });
+            };
+
+            // Remove margin-right and padding-right from body (often added by modal scripts)
+            document.body.style.marginRight = '0px';
+            document.body.style.paddingRight = '0px';
+            document.body.style.overflow = 'auto';
+
+            // Wait a bit for any animations to complete
+            await new Promise(resolve => setTimeout(resolve, 100));
+        }
+        """
+        
+        try:
+            await page.evaluate(remove_overlays_js)
+            await page.wait_for_timeout(500)  # Wait for any animations to complete
+        except Exception as e:
+            if self.verbose:
+                print(f"Warning: Failed to remove overlay elements: {str(e)}")
+
+    async def take_screenshot(self, page: Page) -> str:
+        try:
+            # The page is already loaded, just take the screenshot
+            screenshot = await page.screenshot(full_page=True)
+            return base64.b64encode(screenshot).decode('utf-8')
+        except Exception as e:
+            error_message = f"Failed to take screenshot: {str(e)}"
+            print(error_message)
+
+            # Generate an error image
+            img = Image.new('RGB', (800, 600), color='black')
+            draw = ImageDraw.Draw(img)
+            font = ImageFont.load_default()
+            draw.text((10, 10), error_message, fill=(255, 255, 255), font=font)
+            
+            buffered = BytesIO()
+            img.save(buffered, format="JPEG")
+            return base64.b64encode(buffered.getvalue()).decode('utf-8')
+        finally:
+            await page.close()
 
diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py
index 005523eb..b66173a7 100644
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -23,13 +23,15 @@ class AsyncWebCrawler:
         self,
         crawler_strategy: Optional[AsyncCrawlerStrategy] = None,
         always_by_pass_cache: bool = False,
+        base_directory: str = str(Path.home()),
         **kwargs,
     ):
         self.crawler_strategy = crawler_strategy or AsyncPlaywrightCrawlerStrategy(
             **kwargs
         )
         self.always_by_pass_cache = always_by_pass_cache
-        self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
+        # self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
+        self.crawl4ai_folder = os.path.join(base_directory, ".crawl4ai")
         os.makedirs(self.crawl4ai_folder, exist_ok=True)
         os.makedirs(f"{self.crawl4ai_folder}/cache", exist_ok=True)
         self.ready = False
diff --git a/crawl4ai/config.py b/crawl4ai/config.py
index 2f33a8a9..a07ca977 100644
--- a/crawl4ai/config.py
+++ b/crawl4ai/config.py
@@ -37,16 +37,9 @@ SOCIAL_MEDIA_DOMAINS = [
                             'linkedin.com',
                             'instagram.com',
                             'pinterest.com',
-                            'youtube.com',
                             'tiktok.com',
                             'snapchat.com',
-                            'whatsapp.com',
-                            'messenger.com',
                             'reddit.com',
-                            'tumblr.com',
-                            'buffer.com',
-                            'xing.com',
-                            'flipboard.com',
                         ]
 
 # Threshold for the Image extraction - Range is 1 to 6
diff --git a/crawl4ai/content_scrapping_strategy.py b/crawl4ai/content_scrapping_strategy.py
index 0c472f0d..7799de66 100644
--- a/crawl4ai/content_scrapping_strategy.py
+++ b/crawl4ai/content_scrapping_strategy.py
@@ -14,7 +14,10 @@ from .utils import (
     sanitize_html,
     extract_metadata,
     InvalidCSSSelectorError,
-    CustomHTML2Text
+    CustomHTML2Text,
+    normalize_url,
+    is_external_url
+    
 )
 
 class ContentScrappingStrategy(ABC):
@@ -67,6 +70,8 @@ class WebScrappingStrategy(ContentScrappingStrategy):
 
         links = {'internal': [], 'external': []}
         media = {'images': [], 'videos': [], 'audios': []}
+        internal_links_dict = {}
+        external_links_dict = {}
 
         # Extract meaningful text for media files from closest parent
         def find_closest_parent_with_useful_text(tag):
@@ -205,30 +210,55 @@ class WebScrappingStrategy(ContentScrappingStrategy):
                 social_media_domains = SOCIAL_MEDIA_DOMAINS + kwargs.get('social_media_domains', [])
                 social_media_domains = list(set(social_media_domains))
 
+                
                 try:
                     if element.name == 'a' and element.get('href'):
-                        href = element['href']
+                        href = element.get('href', '').strip()
+                        if not href:  # Skip empty hrefs
+                            return False
+                            
                         url_base = url.split('/')[2]
-                        link_data = {'href': href, 'text': element.get_text()}
-                        if href.startswith('http') and url_base not in href:
-                            links['external'].append(link_data)
+                        
+                        # Normalize the URL
+                        try:
+                            normalized_href = normalize_url(href, url)
+                        except ValueError as e:
+                            # logging.warning(f"Invalid URL format: {href}, Error: {str(e)}")
+                            return False
+                            
+                        link_data = {
+                            'href': normalized_href,
+                            'text': element.get_text().strip(),
+                            'title': element.get('title', '').strip()
+                        }
+                        
+                        # Check for duplicates and add to appropriate dictionary
+                        is_external = is_external_url(normalized_href, url_base)
+                        if is_external:
+                            if normalized_href not in external_links_dict:
+                                external_links_dict[normalized_href] = link_data
                         else:
-                            links['internal'].append(link_data)
+                            if normalized_href not in internal_links_dict:
+                                internal_links_dict[normalized_href] = link_data
+                                
                         keep_element = True
                         
-                        if kwargs.get('exclude_external_links', False):
-                            href_parts = href.split('/')
-                            href_url_base = href_parts[2] if len(href_parts) > 2 else href
-                            if url_base not in href_url_base:
-                                element.decompose()
-                                return False
-                            
-                        if not kwargs.get('exclude_external_links', False) and kwargs.get('exclude_social_media_links', True):
-                            if any(domain in href for domain in social_media_domains):
+                        # Handle external link exclusions
+                        if is_external:
+                            if kwargs.get('exclude_external_links', False):
                                 element.decompose()
                                 return False
+                            elif kwargs.get('exclude_social_media_links', False):
+                                if any(domain in normalized_href.lower() for domain in social_media_domains):
+                                    element.decompose()
+                                    return False
+                            elif kwargs.get('exclude_domains', []):
+                                if any(domain in normalized_href.lower() for domain in kwargs.get('exclude_domains', [])):
+                                    element.decompose()
+                                    return False
+                                    
                 except Exception as e:
-                    raise "Error processing links"
+                    raise Exception(f"Error processing links: {str(e)}")
 
                 try:
                     if element.name == 'img':
@@ -252,12 +282,18 @@ class WebScrappingStrategy(ContentScrappingStrategy):
                                 element.decompose()
                                 return False
                             
-                        if not kwargs.get('exclude_external_images', False) and kwargs.get('exclude_social_media_links', True):
+                        if not kwargs.get('exclude_external_images', False) and kwargs.get('exclude_social_media_links', False):
                             src_url_base = src.split('/')[2]
                             url_base = url.split('/')[2]
                             if any(domain in src for domain in social_media_domains):
                                 element.decompose()
                                 return False
+                            
+                        # Handle exclude domains
+                        if kwargs.get('exclude_domains', []):
+                            if any(domain in src for domain in kwargs.get('exclude_domains', [])):
+                                element.decompose()
+                                return False
                         
                         return True  # Always keep image elements
                 except Exception as e:
@@ -328,6 +364,11 @@ class WebScrappingStrategy(ContentScrappingStrategy):
         # ]
         
         process_element(body)
+        
+        # Update the links dictionary with unique links
+        links['internal'] = list(internal_links_dict.values())
+        links['external'] = list(external_links_dict.values())
+
 
         # # Process images using ThreadPoolExecutor
         imgs = body.find_all('img')
diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
index 046067d8..b79e0c43 100644
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -80,6 +80,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
         self.word_token_rate = kwargs.get("word_token_rate", WORD_TOKEN_RATE)
         self.apply_chunking = kwargs.get("apply_chunking", True)
         self.base_url = kwargs.get("base_url", None)
+        self.api_base = kwargs.get("api_base", kwargs.get("base_url", None))
         self.extra_args = kwargs.get("extra_args", {})
         if not self.apply_chunking:
             self.chunk_token_threshold = 1e9
@@ -116,7 +117,7 @@ class LLMExtractionStrategy(ExtractionStrategy):
             self.provider, 
             prompt_with_variables, 
             self.api_token, 
-            base_url=self.base_url,
+            base_url=self.api_base or self.base_url,
             extra_args = self.extra_args
             ) # , json_response=self.extract_type == "schema")
         try:
diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index 34ab219b..baa08a0f 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -980,4 +980,53 @@ def format_html(html_string):
     soup = BeautifulSoup(html_string, 'html.parser')
     return soup.prettify()
 
+def normalize_url(href, base_url):
+    """Normalize URLs to ensure consistent format"""
+    # Extract protocol and domain from base URL
+    try:
+        base_parts = base_url.split('/')
+        protocol = base_parts[0]
+        domain = base_parts[2]
+    except IndexError:
+        raise ValueError(f"Invalid base URL format: {base_url}")
+    
+    # Handle special protocols
+    special_protocols = {'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:'}
+    if any(href.lower().startswith(proto) for proto in special_protocols):
+        return href.strip()
+        
+    # Handle anchor links
+    if href.startswith('#'):
+        return f"{base_url}{href}"
+        
+    # Handle protocol-relative URLs
+    if href.startswith('//'):
+        return f"{protocol}{href}"
+        
+    # Handle root-relative URLs
+    if href.startswith('/'):
+        return f"{protocol}//{domain}{href}"
+        
+    # Handle relative URLs
+    if not href.startswith(('http://', 'https://')):
+        # Remove leading './' if present
+        href = href.lstrip('./')
+        return f"{protocol}//{domain}/{href}"
+        
+    return href.strip()
 
+def is_external_url(url, base_domain):
+    """Determine if a URL is external"""
+    special_protocols = {'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:'}
+    if any(url.lower().startswith(proto) for proto in special_protocols):
+        return True
+        
+    try:
+        # Handle URLs with protocol
+        if url.startswith(('http://', 'https://')):
+            url_domain = url.split('/')[2]
+            return base_domain.lower() not in url_domain.lower()
+    except IndexError:
+        return False
+        
+    return False