feat: update documentation for preserve_https_for_internal_links. ref #1410

feat: add preserve_https_for_internal_links flag to maintain HTTPS during crawling. Ref #1410
Added a new `preserve_https_for_internal_links` configuration flag that preserves the original HTTPS scheme for same-domain links even when the server redirects to HTTP.
2025-08-28 17:48:12 +08:00 · 2025-08-28 17:38:40 +08:00
19 changed files with 302 additions and 630 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,16 @@ All notable changes to Crawl4AI will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [Unreleased]
+
+### Added
+- **🔒 HTTPS Preservation for Internal Links**: New `preserve_https_for_internal_links` configuration flag
+  - Maintains HTTPS scheme for internal links even when servers redirect to HTTP
+  - Prevents security downgrades during deep crawling
+  - Useful for security-conscious crawling and sites supporting both protocols
+  - Fully backward compatible with opt-in flag (default: `False`)
+  - Fixes issue #1410 where HTTPS URLs were being downgraded to HTTP
+
 ## [0.7.3] - 2025-08-09

 ### Added
--- a/README.md
+++ b/README.md
@@ -304,9 +304,9 @@ The new Docker implementation includes:
 ### Getting Started

 ```bash
-# Pull and run the latest release
-docker pull unclecode/crawl4ai:latest
-docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:latest
+# Pull and run the latest release candidate
+docker pull unclecode/crawl4ai:0.7.0
+docker run -d -p 11235:11235 --name crawl4ai --shm-size=1g unclecode/crawl4ai:0.7.0

 # Visit the playground at http://localhost:11235/playground
 ```
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -1121,6 +1121,7 @@ class CrawlerRunConfig():
        exclude_domains: list = None,
        exclude_internal_links: bool = False,
        score_links: bool = False,
+        preserve_https_for_internal_links: bool = False,
        # Debugging and Logging Parameters
        verbose: bool = True,
        log_console: bool = False,
@@ -1244,6 +1245,7 @@ class CrawlerRunConfig():
        self.exclude_domains = exclude_domains or []
        self.exclude_internal_links = exclude_internal_links
        self.score_links = score_links
+        self.preserve_https_for_internal_links = preserve_https_for_internal_links

        # Debugging and Logging Parameters
        self.verbose = verbose
@@ -1517,6 +1519,7 @@ class CrawlerRunConfig():
            exclude_domains=kwargs.get("exclude_domains", []),
            exclude_internal_links=kwargs.get("exclude_internal_links", False),
            score_links=kwargs.get("score_links", False),
+            preserve_https_for_internal_links=kwargs.get("preserve_https_for_internal_links", False),
            # Debugging and Logging Parameters
            verbose=kwargs.get("verbose", True),
            log_console=kwargs.get("log_console", False),
@@ -1623,6 +1626,7 @@ class CrawlerRunConfig():
            "exclude_domains": self.exclude_domains,
            "exclude_internal_links": self.exclude_internal_links,
            "score_links": self.score_links,
+            "preserve_https_for_internal_links": self.preserve_https_for_internal_links,
            "verbose": self.verbose,
            "log_console": self.log_console,
            "capture_network_requests": self.capture_network_requests,
--- a/crawl4ai/async_webcrawler.py
+++ b/crawl4ai/async_webcrawler.py
@@ -354,6 +354,7 @@ class AsyncWebCrawler:
                    ###############################################################
                    # Process the HTML content, Call CrawlerStrategy.process_html #
                    ###############################################################
+                    from urllib.parse import urlparse
                    crawl_result: CrawlResult = await self.aprocess_html(
                        url=url,
                        html=html,
@@ -364,6 +365,7 @@ class AsyncWebCrawler:
                        verbose=config.verbose,
                        is_raw_html=True if url.startswith("raw:") else False,
                        redirected_url=async_response.redirected_url,
+                        original_scheme=urlparse(url).scheme,
                        **kwargs,
                    )

--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -258,7 +258,11 @@ class LXMLWebScrapingStrategy(ContentScrapingStrategy):
                continue

            try:
-                normalized_href = normalize_url(href, url)
+                normalized_href = normalize_url(
+                    href, url,
+                    preserve_https=kwargs.get('preserve_https_for_internal_links', False),
+                    original_scheme=kwargs.get('original_scheme')
+                )
                link_data = {
                    "href": normalized_href,
                    "text": link.text_content().strip(),
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -2146,7 +2146,9 @@ def normalize_url(
    drop_query_tracking=True,
    sort_query=True,
    keep_fragment=False,
-    extra_drop_params=None
+    extra_drop_params=None,
+    preserve_https=False,
+    original_scheme=None
 ):
    """
    Extended URL normalizer
@@ -2176,6 +2178,17 @@ def normalize_url(

    # Resolve relative paths first
    full_url = urljoin(base_url, href.strip())
+    
+    # Preserve HTTPS if requested and original scheme was HTTPS
+    if preserve_https and original_scheme == 'https':
+        parsed_full = urlparse(full_url)
+        parsed_base = urlparse(base_url)
+        # Only preserve HTTPS for same-domain links (not protocol-relative URLs)
+        # Protocol-relative URLs (//example.com) should follow the base URL's scheme
+        if (parsed_full.scheme == 'http' and 
+            parsed_full.netloc == parsed_base.netloc and
+            not href.strip().startswith('//')):
+            full_url = full_url.replace('http://', 'https://', 1)

    # Parse once, edit parts, then rebuild
    parsed = urlparse(full_url)
@@ -2184,10 +2197,8 @@ def normalize_url(
    netloc = parsed.netloc.lower()

    # ── path ──
-    # Strip duplicate slashes and trailing "/" (except root)
-    # IMPORTANT: Don't use quote(unquote()) as it mangles + signs in URLs
-    # The path from urlparse is already properly encoded
-    path = parsed.path
+    # Strip duplicate slashes and trailing “/” (except root)
+    path = quote(unquote(parsed.path))
    if path.endswith('/') and path != '/':
        path = path.rstrip('/')

@@ -2227,7 +2238,7 @@ def normalize_url(
    return normalized


-def normalize_url_for_deep_crawl(href, base_url):
+def normalize_url_for_deep_crawl(href, base_url, preserve_https=False, original_scheme=None):
    """Normalize URLs to ensure consistent format"""
    from urllib.parse import urljoin, urlparse, urlunparse, parse_qs, urlencode

@@ -2238,6 +2249,17 @@ def normalize_url_for_deep_crawl(href, base_url):
    # Use urljoin to handle relative URLs
    full_url = urljoin(base_url, href.strip())
    
+    # Preserve HTTPS if requested and original scheme was HTTPS
+    if preserve_https and original_scheme == 'https':
+        parsed_full = urlparse(full_url)
+        parsed_base = urlparse(base_url)
+        # Only preserve HTTPS for same-domain links (not protocol-relative URLs)
+        # Protocol-relative URLs (//example.com) should follow the base URL's scheme
+        if (parsed_full.scheme == 'http' and 
+            parsed_full.netloc == parsed_base.netloc and
+            not href.strip().startswith('//')):
+            full_url = full_url.replace('http://', 'https://', 1)
+    
    # Parse the URL for normalization
    parsed = urlparse(full_url)
    
@@ -2275,7 +2297,7 @@ def normalize_url_for_deep_crawl(href, base_url):
    return normalized

@lru_cache(maxsize=10000)
-def efficient_normalize_url_for_deep_crawl(href, base_url):
+def efficient_normalize_url_for_deep_crawl(href, base_url, preserve_https=False, original_scheme=None):
    """Efficient URL normalization with proper parsing"""
    from urllib.parse import urljoin
    
@@ -2285,6 +2307,17 @@ def efficient_normalize_url_for_deep_crawl(href, base_url):
    # Resolve relative URLs
    full_url = urljoin(base_url, href.strip())
    
+    # Preserve HTTPS if requested and original scheme was HTTPS
+    if preserve_https and original_scheme == 'https':
+        parsed_full = urlparse(full_url)
+        parsed_base = urlparse(base_url)
+        # Only preserve HTTPS for same-domain links (not protocol-relative URLs)
+        # Protocol-relative URLs (//example.com) should follow the base URL's scheme
+        if (parsed_full.scheme == 'http' and 
+            parsed_full.netloc == parsed_base.netloc and
+            not href.strip().startswith('//')):
+            full_url = full_url.replace('http://', 'https://', 1)
+    
    # Use proper URL parsing
    parsed = urlparse(full_url)
    
--- a/deploy/docker/.llm.env.example
+++ b/deploy/docker/.llm.env.example
@@ -10,23 +10,4 @@ GEMINI_API_TOKEN=your_gemini_key_here
 # Optional: Override the default LLM provider
 # Examples: "openai/gpt-4", "anthropic/claude-3-opus", "deepseek/chat", etc.
 # If not set, uses the provider specified in config.yml (default: openai/gpt-4o-mini)
-# LLM_PROVIDER=anthropic/claude-3-opus
-
-# Optional: Global LLM temperature setting (0.0-2.0)
-# Controls randomness in responses. Lower = more focused, Higher = more creative
-# LLM_TEMPERATURE=0.7
-
-# Optional: Global custom API base URL
-# Use this to point to custom endpoints or proxy servers
-# LLM_BASE_URL=https://api.custom.com/v1
-
-# Optional: Provider-specific temperature overrides
-# These take precedence over the global LLM_TEMPERATURE
-# OPENAI_TEMPERATURE=0.5
-# ANTHROPIC_TEMPERATURE=0.3
-# GROQ_TEMPERATURE=0.8
-
-# Optional: Provider-specific base URL overrides
-# Use for provider-specific proxy endpoints
-# OPENAI_BASE_URL=https://custom-openai.company.com/v1
-# GROQ_BASE_URL=https://custom-groq.company.com/v1
+# LLM_PROVIDER=anthropic/claude-3-opus
--- a/deploy/docker/README.md
+++ b/deploy/docker/README.md
@@ -692,7 +692,8 @@ app:
 # Default LLM Configuration
 llm:
  provider: "openai/gpt-4o-mini"  # Can be overridden by LLM_PROVIDER env var
-  # api_key: sk-...  # If you pass the API key directly (not recommended)
+  api_key_env: "OPENAI_API_KEY"
+  # api_key: sk-...  # If you pass the API key directly then api_key_env will be ignored

 # Redis Configuration (Used by internal Redis server managed by supervisord)
 redis:
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -42,9 +42,7 @@ from utils import (
    should_cleanup_task,
    decode_redis_hash,
    get_llm_api_key,
-    validate_llm_provider,
-    get_llm_temperature,
-    get_llm_base_url
+    validate_llm_provider
 )

 import psutil, time
@@ -98,9 +96,7 @@ async def handle_llm_qa(
        response = perform_completion_with_backoff(
            provider=config["llm"]["provider"],
            prompt_with_variables=prompt,
-            api_token=get_llm_api_key(config),  # Returns None to let litellm handle it
-            temperature=get_llm_temperature(config),
-            base_url=get_llm_base_url(config)
+            api_token=get_llm_api_key(config)
        )

        return response.choices[0].message.content
@@ -119,9 +115,7 @@ async def process_llm_extraction(
    instruction: str,
    schema: Optional[str] = None,
    cache: str = "0",
-    provider: Optional[str] = None,
-    temperature: Optional[float] = None,
-    base_url: Optional[str] = None
+    provider: Optional[str] = None
 ) -> None:
    """Process LLM extraction in background."""
    try:
@@ -133,13 +127,11 @@ async def process_llm_extraction(
                "error": error_msg
            })
            return
-        api_key = get_llm_api_key(config, provider)  # Returns None to let litellm handle it
+        api_key = get_llm_api_key(config, provider)
        llm_strategy = LLMExtractionStrategy(
            llm_config=LLMConfig(
                provider=provider or config["llm"]["provider"],
-                api_token=api_key,
-                temperature=temperature or get_llm_temperature(config, provider),
-                base_url=base_url or get_llm_base_url(config, provider)
+                api_token=api_key
            ),
            instruction=instruction,
            schema=json.loads(schema) if schema else None,
@@ -186,9 +178,7 @@ async def handle_markdown_request(
    query: Optional[str] = None,
    cache: str = "0",
    config: Optional[dict] = None,
-    provider: Optional[str] = None,
-    temperature: Optional[float] = None,
-    base_url: Optional[str] = None
+    provider: Optional[str] = None
 ) -> str:
    """Handle markdown generation requests."""
    try:
@@ -213,9 +203,7 @@ async def handle_markdown_request(
                FilterType.LLM: LLMContentFilter(
                    llm_config=LLMConfig(
                        provider=provider or config["llm"]["provider"],
-                        api_token=get_llm_api_key(config, provider),  # Returns None to let litellm handle it
-                        temperature=temperature or get_llm_temperature(config, provider),
-                        base_url=base_url or get_llm_base_url(config, provider)
+                        api_token=get_llm_api_key(config, provider),
                    ),
                    instruction=query or "Extract main content"
                )
@@ -260,9 +248,7 @@ async def handle_llm_request(
    schema: Optional[str] = None,
    cache: str = "0",
    config: Optional[dict] = None,
-    provider: Optional[str] = None,
-    temperature: Optional[float] = None,
-    api_base_url: Optional[str] = None
+    provider: Optional[str] = None
 ) -> JSONResponse:
    """Handle LLM extraction requests."""
    base_url = get_base_url(request)
@@ -293,9 +279,7 @@ async def handle_llm_request(
            cache,
            base_url,
            config,
-            provider,
-            temperature,
-            api_base_url
+            provider
        )

    except Exception as e:
@@ -340,9 +324,7 @@ async def create_new_task(
    cache: str,
    base_url: str,
    config: dict,
-    provider: Optional[str] = None,
-    temperature: Optional[float] = None,
-    api_base_url: Optional[str] = None
+    provider: Optional[str] = None
 ) -> JSONResponse:
    """Create and initialize a new task."""
    decoded_url = unquote(input_path)
@@ -367,9 +349,7 @@ async def create_new_task(
        query,
        schema,
        cache,
-        provider,
-        temperature,
-        api_base_url
+        provider
    )

    return JSONResponse({
--- a/deploy/docker/config.yml
+++ b/deploy/docker/config.yml
@@ -11,7 +11,8 @@ app:
 # Default LLM Configuration
 llm:
  provider: "openai/gpt-4o-mini"
-  # api_key: sk-...  # If you pass the API key directly (not recommended)
+  api_key_env: "OPENAI_API_KEY"
+  # api_key: sk-...  # If you pass the API key directly then api_key_env will be ignored

 # Redis Configuration
 redis:
--- a/deploy/docker/job.py
+++ b/deploy/docker/job.py
@@ -37,8 +37,6 @@ class LlmJobPayload(BaseModel):
    schema: Optional[str] = None
    cache:  bool = False
    provider: Optional[str] = None
-    temperature: Optional[float] = None
-    base_url: Optional[str] = None


 class CrawlJobPayload(BaseModel):
@@ -65,8 +63,6 @@ async def llm_job_enqueue(
        cache=payload.cache,
        config=_config,
        provider=payload.provider,
-        temperature=payload.temperature,
-        api_base_url=payload.base_url,
    )


@@ -76,7 +72,7 @@ async def llm_job_status(
    task_id: str,
    _td: Dict = Depends(lambda: _token_dep())
 ):
-    return await handle_task_status(_redis, task_id, base_url=str(request.base_url))
+    return await handle_task_status(_redis, task_id)


 # ---------- CRAWL job -------------------------------------------------------
--- a/deploy/docker/schemas.py
+++ b/deploy/docker/schemas.py
@@ -16,8 +16,6 @@ class MarkdownRequest(BaseModel):
    q:   Optional[str] = Field(None,  description="Query string used by BM25/LLM filters")
    c:   Optional[str] = Field("0",   description="Cache‑bust / revision counter")
    provider: Optional[str] = Field(None, description="LLM provider override (e.g., 'anthropic/claude-3-opus')")
-    temperature: Optional[float] = Field(None, description="LLM temperature override (0.0-2.0)")
-    base_url: Optional[str] = Field(None, description="LLM API base URL override")


 class RawCode(BaseModel):
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -241,8 +241,7 @@ async def get_markdown(
        raise HTTPException(
            400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)")
    markdown = await handle_markdown_request(
-        body.url, body.f, body.q, body.c, config, body.provider,
-        body.temperature, body.base_url
+        body.url, body.f, body.q, body.c, config, body.provider
    )
    return JSONResponse({
        "url": body.url,
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -71,7 +71,7 @@ def decode_redis_hash(hash_data: Dict[bytes, bytes]) -> Dict[str, str]:



-def get_llm_api_key(config: Dict, provider: Optional[str] = None) -> Optional[str]:
+def get_llm_api_key(config: Dict, provider: Optional[str] = None) -> str:
    """Get the appropriate API key based on the LLM provider.
    
    Args:
@@ -79,14 +79,19 @@ def get_llm_api_key(config: Dict, provider: Optional[str] = None) -> Optional[st
        provider: Optional provider override (e.g., "openai/gpt-4")
    
    Returns:
-        The API key if directly configured, otherwise None to let litellm handle it
+        The API key for the provider, or empty string if not found
    """
-    # Check if direct API key is configured (for backward compatibility)
+        
+    # Use provided provider or fall back to config
+    if not provider:
+        provider = config["llm"]["provider"]
+    
+    # Check if direct API key is configured
    if "api_key" in config["llm"]:
        return config["llm"]["api_key"]
    
-    # Return None - litellm will automatically find the right environment variable
-    return None
+    # Fall back to the configured api_key_env if no match
+    return os.environ.get(config["llm"].get("api_key_env", ""), "")


 def validate_llm_provider(config: Dict, provider: Optional[str] = None) -> tuple[bool, str]:
@@ -99,78 +104,19 @@ def validate_llm_provider(config: Dict, provider: Optional[str] = None) -> tuple
    Returns:
        Tuple of (is_valid, error_message)
    """
-    # If a direct API key is configured, validation passes
-    if "api_key" in config["llm"]:
-        return True, ""
+    # Use provided provider or fall back to config
+    if not provider:
+        provider = config["llm"]["provider"]
+    
+    # Get the API key for this provider
+    api_key = get_llm_api_key(config, provider)
+    
+    if not api_key:
+        return False, f"No API key found for provider '{provider}'. Please set the appropriate environment variable."
    
-    # Otherwise, trust that litellm will find the appropriate environment variable
-    # We can't easily validate this without reimplementing litellm's logic
    return True, ""


-def get_llm_temperature(config: Dict, provider: Optional[str] = None) -> Optional[float]:
-    """Get temperature setting based on the LLM provider.
-    
-    Priority order:
-    1. Provider-specific environment variable (e.g., OPENAI_TEMPERATURE)
-    2. Global LLM_TEMPERATURE environment variable
-    3. None (to use litellm/provider defaults)
-    
-    Args:
-        config: The application configuration dictionary
-        provider: Optional provider override (e.g., "openai/gpt-4")
-    
-    Returns:
-        The temperature setting if configured, otherwise None
-    """
-    # Check provider-specific temperature first
-    if provider:
-        provider_name = provider.split('/')[0].upper()
-        provider_temp = os.environ.get(f"{provider_name}_TEMPERATURE")
-        if provider_temp:
-            try:
-                return float(provider_temp)
-            except ValueError:
-                logging.warning(f"Invalid temperature value for {provider_name}: {provider_temp}")
-    
-    # Check global LLM_TEMPERATURE
-    global_temp = os.environ.get("LLM_TEMPERATURE")
-    if global_temp:
-        try:
-            return float(global_temp)
-        except ValueError:
-            logging.warning(f"Invalid global temperature value: {global_temp}")
-    
-    # Return None to use litellm/provider defaults
-    return None
-
-
-def get_llm_base_url(config: Dict, provider: Optional[str] = None) -> Optional[str]:
-    """Get base URL setting based on the LLM provider.
-    
-    Priority order:
-    1. Provider-specific environment variable (e.g., OPENAI_BASE_URL)
-    2. Global LLM_BASE_URL environment variable
-    3. None (to use default endpoints)
-    
-    Args:
-        config: The application configuration dictionary
-        provider: Optional provider override (e.g., "openai/gpt-4")
-    
-    Returns:
-        The base URL if configured, otherwise None
-    """
-    # Check provider-specific base URL first
-    if provider:
-        provider_name = provider.split('/')[0].upper()
-        provider_url = os.environ.get(f"{provider_name}_BASE_URL")
-        if provider_url:
-            return provider_url
-    
-    # Check global LLM_BASE_URL
-    return os.environ.get("LLM_BASE_URL")
-
-
 def verify_email_domain(email: str) -> bool:
    try:
        domain = email.split('@')[1]
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -155,6 +155,7 @@ If your page is a single-page app with repeated JS updates, set `js_only=True` i
 | **`exclude_external_links`** | `bool` (False)          | Removes all links pointing outside the current domain.                                                                      |
 | **`exclude_social_media_links`** | `bool` (False)      | Strips links specifically to social sites (like Facebook or Twitter).                                                      |
 | **`exclude_domains`**        | `list` ([])             | Provide a custom list of domains to exclude (like `["ads.com", "trackers.io"]`).                                            |
+| **`preserve_https_for_internal_links`** | `bool` (False) | If `True`, preserves HTTPS scheme for internal links even when the server redirects to HTTP. Useful for security-conscious crawling. |

 Use these for link-level content filtering (often to keep crawls “internal” or to remove spammy domains).

--- a/docs/md_v2/core/deep-crawling.md
+++ b/docs/md_v2/core/deep-crawling.md
@@ -472,6 +472,17 @@ Note that for BestFirstCrawlingStrategy, score_threshold is not needed since pag

 5.**Balance breadth vs. depth.** Choose your strategy wisely - BFS for comprehensive coverage, DFS for deep exploration, BestFirst for focused relevance-based crawling.

+6.**Preserve HTTPS for security.** If crawling HTTPS sites that redirect to HTTP, use `preserve_https_for_internal_links=True` to maintain secure connections:
+
+```python
+config = CrawlerRunConfig(
+    deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=2),
+    preserve_https_for_internal_links=True  # Keep HTTPS even if server redirects to HTTP
+)
+```
+
+This is especially useful for security-conscious crawling or when dealing with sites that support both protocols.
+
 ---

 ## 10. Summary & Next Steps
--- a/docs/md_v2/core/docker-deployment.md
+++ b/docs/md_v2/core/docker-deployment.md
@@ -89,16 +89,6 @@ ANTHROPIC_API_KEY=your-anthropic-key
 # TOGETHER_API_KEY=your-together-key
 # MISTRAL_API_KEY=your-mistral-key
 # GEMINI_API_TOKEN=your-gemini-token
-
-# Optional: Global LLM settings
-# LLM_PROVIDER=openai/gpt-4o-mini
-# LLM_TEMPERATURE=0.7
-# LLM_BASE_URL=https://api.custom.com/v1
-
-# Optional: Provider-specific overrides
-# OPENAI_TEMPERATURE=0.5
-# OPENAI_BASE_URL=https://custom-openai.com/v1
-# ANTHROPIC_TEMPERATURE=0.3
 EOL
 ```
 > 🔑 **Note**: Keep your API keys secure! Never commit `.llm.env` to version control.
@@ -166,43 +156,27 @@ cp deploy/docker/.llm.env.example .llm.env

 **Flexible LLM Provider Configuration:**

-The Docker setup now supports flexible LLM provider configuration through a hierarchical system:
+The Docker setup now supports flexible LLM provider configuration through three methods:

-1. **API Request Parameters** (Highest Priority): Specify per request
+1. **Environment Variable** (Highest Priority): Set `LLM_PROVIDER` to override the default
+   ```bash
+   export LLM_PROVIDER="anthropic/claude-3-opus"
+   # Or in your .llm.env file:
+   # LLM_PROVIDER=anthropic/claude-3-opus
+   ```
+
+2. **API Request Parameter**: Specify provider per request
   ```json
   {
     "url": "https://example.com",
     "f": "llm",
-     "provider": "groq/mixtral-8x7b",
-     "temperature": 0.7,
-     "base_url": "https://api.custom.com/v1"
+     "provider": "groq/mixtral-8x7b"
   }
   ```

-2. **Provider-Specific Environment Variables**: Override for specific providers
-   ```bash
-   # In your .llm.env file:
-   OPENAI_TEMPERATURE=0.5
-   OPENAI_BASE_URL=https://custom-openai.com/v1
-   ANTHROPIC_TEMPERATURE=0.3
-   ```
+3. **Config File Default**: Falls back to `config.yml` (default: `openai/gpt-4o-mini`)

-3. **Global Environment Variables**: Set defaults for all providers
-   ```bash
-   # In your .llm.env file:
-   LLM_PROVIDER=anthropic/claude-3-opus
-   LLM_TEMPERATURE=0.7
-   LLM_BASE_URL=https://api.proxy.com/v1
-   ```
-
-4. **Config File Default**: Falls back to `config.yml` (default: `openai/gpt-4o-mini`)
-
-The system automatically selects the appropriate API key based on the provider. LiteLLM handles finding the correct environment variable for each provider (e.g., OPENAI_API_KEY for OpenAI, GEMINI_API_TOKEN for Google Gemini, etc.).
-
-**Supported LLM Parameters:**
- `provider`: LLM provider and model (e.g., "openai/gpt-4", "anthropic/claude-3-opus")
- `temperature`: Controls randomness (0.0-2.0, lower = more focused, higher = more creative)
- `base_url`: Custom API endpoint for proxy servers or alternative endpoints
+The system automatically selects the appropriate API key based on the configured `api_key_env` in the config file.

 #### 3. Build and Run with Compose

@@ -581,101 +555,6 @@ Crucially, when sending configurations directly via JSON, they **must** follow t
 **LLM Extraction Strategy** *(Keep example, ensure schema uses type/value wrapper)*
 *(Keep Deep Crawler Example)*

-### LLM Configuration Examples
-
-The Docker API supports dynamic LLM configuration through multiple levels:
-
-#### Temperature Control
-
-Temperature affects the randomness of LLM responses (0.0 = deterministic, 2.0 = very creative):
-
-```python
-import requests
-
-# Low temperature for factual extraction
-response = requests.post(
-    "http://localhost:11235/md",
-    json={
-        "url": "https://example.com",
-        "f": "llm",
-        "q": "Extract all dates and numbers from this page",
-        "temperature": 0.2  # Very focused, deterministic
-    }
-)
-
-# High temperature for creative tasks
-response = requests.post(
-    "http://localhost:11235/md",
-    json={
-        "url": "https://example.com", 
-        "f": "llm",
-        "q": "Write a creative summary of this content",
-        "temperature": 1.2  # More creative, varied responses
-    }
-)
-```
-
-#### Custom API Endpoints
-
-Use custom base URLs for proxy servers or alternative API endpoints:
-
-```python
-
-# Using a local LLM server
-response = requests.post(
-    "http://localhost:11235/md",
-    json={
-        "url": "https://example.com",
-        "f": "llm",
-        "q": "Extract key information",
-        "provider": "ollama/llama2",
-        "base_url": "http://localhost:11434/v1"
-    }
-)
-```
-
-#### Dynamic Provider Selection
-
-Switch between providers based on task requirements:
-
-```python
-async def smart_extraction(url: str, content_type: str):
-    """Select provider and temperature based on content type"""
-    
-    configs = {
-        "technical": {
-            "provider": "openai/gpt-4",
-            "temperature": 0.3,
-            "query": "Extract technical specifications and code examples"
-        },
-        "creative": {
-            "provider": "anthropic/claude-3-opus",
-            "temperature": 0.9,
-            "query": "Create an engaging narrative summary"
-        },
-        "quick": {
-            "provider": "groq/mixtral-8x7b",
-            "temperature": 0.5,
-            "query": "Quick summary in bullet points"
-        }
-    }
-    
-    config = configs.get(content_type, configs["quick"])
-    
-    response = await httpx.post(
-        "http://localhost:11235/md",
-        json={
-            "url": url,
-            "f": "llm",
-            "q": config["query"],
-            "provider": config["provider"],
-            "temperature": config["temperature"]
-        }
-    )
-    
-    return response.json()
-```
-
 ### REST API Examples

 Update URLs to use port `11235`.
@@ -814,8 +693,8 @@ app:
 # Default LLM Configuration
 llm:
  provider: "openai/gpt-4o-mini"  # Can be overridden by LLM_PROVIDER env var
-  # api_key: sk-...  # If you pass the API key directly (not recommended)
-  # temperature and base_url are controlled via environment variables or request parameters
+  api_key_env: "OPENAI_API_KEY"
+  # api_key: sk-...  # If you pass the API key directly then api_key_env will be ignored

 # Redis Configuration (Used by internal Redis server managed by supervisord)
 redis:
--- a/tests/docker/test_llm_params.py
+++ b/tests/docker/test_llm_params.py
@@ -1,349 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for LLM temperature and base_url parameters in Crawl4AI Docker API.
-This demonstrates the new hierarchical configuration system:
-1. Request-level parameters (highest priority)
-2. Provider-specific environment variables
-3. Global environment variables
-4. System defaults (lowest priority)
-"""
-
-import asyncio
-import httpx
-import json
-import os
-from rich.console import Console
-from rich.panel import Panel
-from rich.syntax import Syntax
-from rich.table import Table
-
-
-console = Console()
-
-# Configuration
-BASE_URL = "http://localhost:11235"  # Docker API endpoint
-TEST_URL = "https://httpbin.org/html"     # Simple test page
-
-# --- Helper Functions ---
-
-async def check_server_health(client: httpx.AsyncClient) -> bool:
-    """Check if the server is healthy."""
-    console.print("[bold cyan]Checking server health...[/]", end="")
-    try:
-        response = await client.get("/health", timeout=10.0)
-        response.raise_for_status()
-        console.print(" [bold green]✓ Server is healthy![/]")
-        return True
-    except Exception as e:
-        console.print(f"\n[bold red]✗ Server health check failed: {e}[/]")
-        console.print(f"Is the server running at {BASE_URL}?")
-        return False
-
-def print_request(endpoint: str, payload: dict, title: str = "Request"):
-    """Pretty print the request."""
-    syntax = Syntax(json.dumps(payload, indent=2), "json", theme="monokai")
-    console.print(Panel.fit(
-        f"[cyan]POST {endpoint}[/cyan]\n{syntax}",
-        title=f"[bold blue]{title}[/]",
-        border_style="blue"
-    ))
-
-def print_response(response: dict, title: str = "Response"):
-    """Pretty print relevant parts of the response."""
-    # Extract only the relevant parts
-    relevant = {}
-    if "markdown" in response:
-        relevant["markdown"] = response["markdown"][:200] + "..." if len(response.get("markdown", "")) > 200 else response.get("markdown", "")
-    if "success" in response:
-        relevant["success"] = response["success"]
-    if "url" in response:
-        relevant["url"] = response["url"]
-    if "filter" in response:
-        relevant["filter"] = response["filter"]
-    
-    console.print(Panel.fit(
-        Syntax(json.dumps(relevant, indent=2), "json", theme="monokai"),
-        title=f"[bold green]{title}[/]",
-        border_style="green"
-    ))
-
-# --- Test Functions ---
-
-async def test_default_no_params(client: httpx.AsyncClient):
-    """Test 1: No temperature or base_url specified - uses defaults"""
-    console.rule("[bold yellow]Test 1: Default Configuration (No Parameters)[/]")
-    
-    payload = {
-        "url": TEST_URL,
-        "f": "llm",
-        "q": "What is the main heading of this page? Answer in exactly 5 words."
-    }
-    
-    print_request("/md", payload, "Request without temperature/base_url")
-    
-    try:
-        response = await client.post("/md", json=payload, timeout=30.0)
-        response.raise_for_status()
-        data = response.json()
-        print_response(data, "Response (using system defaults)")
-        console.print("[dim]→ This used system defaults or environment variables if set[/]")
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/]")
-
-async def test_request_temperature(client: httpx.AsyncClient):
-    """Test 2: Request-level temperature (highest priority)"""
-    console.rule("[bold yellow]Test 2: Request-Level Temperature[/]")
-    
-    # Test with low temperature (more focused)
-    payload_low = {
-        "url": TEST_URL,
-        "f": "llm",
-        "q": "What is the main heading? Be creative and poetic.",
-        "temperature": 0.1  # Very low - should be less creative
-    }
-    
-    print_request("/md", payload_low, "Low Temperature (0.1)")
-    
-    try:
-        response = await client.post("/md", json=payload_low, timeout=30.0)
-        response.raise_for_status()
-        data_low = response.json()
-        print_response(data_low, "Response with Low Temperature")
-        console.print("[dim]→ Low temperature (0.1) should produce focused, less creative output[/]")
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/]")
-    
-    console.print()
-    
-    # Test with high temperature (more creative)
-    payload_high = {
-        "url": TEST_URL,
-        "f": "llm",
-        "q": "What is the main heading? Be creative and poetic.",
-        "temperature": 1.5  # High - should be more creative
-    }
-    
-    print_request("/md", payload_high, "High Temperature (1.5)")
-    
-    try:
-        response = await client.post("/md", json=payload_high, timeout=30.0)
-        response.raise_for_status()
-        data_high = response.json()
-        print_response(data_high, "Response with High Temperature")
-        console.print("[dim]→ High temperature (1.5) should produce more creative, varied output[/]")
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/]")
-
-async def test_provider_override(client: httpx.AsyncClient):
-    """Test 3: Provider override with temperature"""
-    console.rule("[bold yellow]Test 3: Provider Override with Temperature[/]")
-    
-    provider = "gemini/gemini-2.5-flash-lite"
-    payload = {
-        "url": TEST_URL,
-        "f": "llm",
-        "q": "Summarize this page in one sentence.",
-        "provider": provider,  # Explicitly set provider
-        "temperature": 0.7
-    }
-    
-    print_request("/md", payload, "Provider + Temperature Override")
-    
-    try:
-        response = await client.post("/md", json=payload, timeout=30.0)
-        response.raise_for_status()
-        data = response.json()
-        print_response(data, "Response with Provider Override")
-        console.print(f"[dim]→ This explicitly uses {provider} with temperature 0.7[/]")
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/]")
-
-async def test_base_url_custom(client: httpx.AsyncClient):
-    """Test 4: Custom base_url (will fail unless you have a custom endpoint)"""
-    console.rule("[bold yellow]Test 4: Custom Base URL (Demo Only)[/]")
-    
-    payload = {
-        "url": TEST_URL,
-        "f": "llm",
-        "q": "What is this page about?",
-        "base_url": "https://api.custom-endpoint.com/v1",  # Custom endpoint
-        "temperature": 0.5
-    }
-    
-    print_request("/md", payload, "Custom Base URL Request")
-    console.print("[yellow]Note: This will fail unless you have a custom endpoint set up[/]")
-    
-    try:
-        response = await client.post("/md", json=payload, timeout=10.0)
-        response.raise_for_status()
-        data = response.json()
-        print_response(data, "Response from Custom Endpoint")
-    except httpx.HTTPStatusError as e:
-        console.print(f"[yellow]Expected failure (no custom endpoint): Status {e.response.status_code}[/]")
-    except Exception as e:
-        console.print(f"[yellow]Expected error: {e}[/]")
-
-async def test_llm_job_endpoint(client: httpx.AsyncClient):
-    """Test 5: Test the /llm/job endpoint with temperature and base_url"""
-    console.rule("[bold yellow]Test 5: LLM Job Endpoint with Parameters[/]")
-    
-    payload = {
-        "url": TEST_URL,
-        "q": "Extract the main title and any key information",
-        "temperature": 0.3,
-        # "base_url": "https://api.openai.com/v1"  # Optional
-    }
-    
-    print_request("/llm/job", payload, "LLM Job with Temperature")
-    
-    try:
-        # Submit the job
-        response = await client.post("/llm/job", json=payload, timeout=30.0)
-        response.raise_for_status()
-        job_data = response.json()
-        
-        if "task_id" in job_data:
-            task_id = job_data["task_id"]
-            console.print(f"[green]Job created with task_id: {task_id}[/]")
-            
-            # Poll for result (simplified - in production use proper polling)
-            await asyncio.sleep(3)
-            
-            status_response = await client.get(f"/llm/job/{task_id}")
-            status_data = status_response.json()
-            
-            if status_data.get("status") == "completed":
-                console.print("[green]Job completed successfully![/]")
-                if "result" in status_data:
-                    console.print(Panel.fit(
-                        Syntax(json.dumps(status_data["result"], indent=2), "json", theme="monokai"),
-                        title="Extraction Result",
-                        border_style="green"
-                    ))
-            else:
-                console.print(f"[yellow]Job status: {status_data.get('status', 'unknown')}[/]")
-        else:
-            console.print(f"[red]Unexpected response: {job_data}[/]")
-            
-    except Exception as e:
-        console.print(f"[red]Error: {e}[/]")
-
-
-async def test_llm_endpoint(client: httpx.AsyncClient):
-    """
-    Quick QA round-trip with /llm.
-    Asks a trivial question against SIMPLE_URL just to show wiring.
-    """
-    import time
-    import urllib.parse
-
-    page_url = "https://kidocode.com"
-    question = "What is the title of this page?"
-
-    enc = urllib.parse.quote_plus(page_url, safe="")
-    console.print(f"GET /llm/{enc}?q={question}")
-
-    try:
-        t0 = time.time()
-        resp = await client.get(f"/llm/{enc}", params={"q": question})
-        dt = time.time() - t0
-        console.print(
-            f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/] (took {dt:.2f}s)")
-        resp.raise_for_status()
-        answer = resp.json().get("answer", "")
-        console.print(Panel(answer or "No answer returned",
-                      title="LLM answer", border_style="magenta", expand=False))
-    except Exception as e:
-        console.print(f"[bold red]Error hitting /llm:[/] {e}")
-
-
-async def show_environment_info():
-    """Display current environment configuration"""
-    console.rule("[bold cyan]Current Environment Configuration[/]")
-    
-    table = Table(title="LLM Environment Variables", show_header=True, header_style="bold magenta")
-    table.add_column("Variable", style="cyan", width=30)
-    table.add_column("Value", style="yellow")
-    table.add_column("Description", style="dim")
-    
-    env_vars = [
-        ("LLM_PROVIDER", "Global default provider"),
-        ("LLM_TEMPERATURE", "Global default temperature"),
-        ("LLM_BASE_URL", "Global custom API endpoint"),
-        ("OPENAI_API_KEY", "OpenAI API key"),
-        ("OPENAI_TEMPERATURE", "OpenAI-specific temperature"),
-        ("OPENAI_BASE_URL", "OpenAI-specific endpoint"),
-        ("ANTHROPIC_API_KEY", "Anthropic API key"),
-        ("ANTHROPIC_TEMPERATURE", "Anthropic-specific temperature"),
-        ("GROQ_API_KEY", "Groq API key"),
-        ("GROQ_TEMPERATURE", "Groq-specific temperature"),
-    ]
-    
-    for var, desc in env_vars:
-        value = os.environ.get(var, "[not set]")
-        if "API_KEY" in var and value != "[not set]":
-            # Mask API keys for security
-            value = value[:10] + "..." if len(value) > 10 else "***"
-        table.add_row(var, value, desc)
-    
-    console.print(table)
-    console.print()
-
-# --- Main Test Runner ---
-
-async def main():
-    """Run all tests"""
-    console.print(Panel.fit(
-        "[bold cyan]Crawl4AI LLM Parameters Test Suite[/]\n" +
-        "Testing temperature and base_url configuration hierarchy",
-        border_style="cyan"
-    ))
-    
-    # Show current environment
-    # await show_environment_info()
-    
-    # Create HTTP client
-    async with httpx.AsyncClient(base_url=BASE_URL, timeout=60.0) as client:
-        # Check server health
-        if not await check_server_health(client):
-            console.print("[red]Server is not available. Please ensure the Docker container is running.[/]")
-            return
-        
-        # Run tests
-        tests = [
-            ("Default Configuration", test_default_no_params),
-            ("Request Temperature", test_request_temperature),
-            ("Provider Override", test_provider_override),
-            ("Custom Base URL", test_base_url_custom),
-            ("LLM Job Endpoint", test_llm_job_endpoint),
-            ("LLM Endpoint", test_llm_endpoint),
-        ]
-        
-        for i, (name, test_func) in enumerate(tests, 1):
-            if i > 1:
-                console.print()  # Add spacing between tests
-            
-            try:
-                await test_func(client)
-            except Exception as e:
-                console.print(f"[red]Test '{name}' failed with error: {e}[/]")
-                console.print_exception(show_locals=False)
-        
-        console.rule("[bold green]All Tests Complete![/]", style="green")
-        
-        # Summary
-        console.print("\n[bold cyan]Configuration Hierarchy Summary:[/]")
-        console.print("1. [yellow]Request parameters[/] - Highest priority (temperature, base_url in API call)")
-        console.print("2. [yellow]Provider-specific env[/] - e.g., OPENAI_TEMPERATURE, GROQ_BASE_URL")
-        console.print("3. [yellow]Global env variables[/] - LLM_TEMPERATURE, LLM_BASE_URL")
-        console.print("4. [yellow]System defaults[/] - Lowest priority (provider/litellm defaults)")
-        console.print()
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        console.print("\n[yellow]Tests interrupted by user.[/]")
-    except Exception as e:
-        console.print(f"\n[bold red]An error occurred:[/]")
-        console.print_exception(show_locals=False)
--- a/tests/test_preserve_https_for_internal_links.py
+++ b/tests/test_preserve_https_for_internal_links.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+Final test and demo for HTTPS preservation feature (Issue #1410)
+
+This demonstrates how the preserve_https_for_internal_links flag
+prevents HTTPS downgrade when servers redirect to HTTP.
+"""
+
+import sys
+import os
+from urllib.parse import urljoin, urlparse
+
+def demonstrate_issue():
+    """Show the problem: HTTPS -> HTTP redirect causes HTTP links"""
+    
+    print("=" * 60)
+    print("DEMONSTRATING THE ISSUE")
+    print("=" * 60)
+    
+    # Simulate what happens during crawling
+    original_url = "https://quotes.toscrape.com/tag/deep-thoughts"
+    redirected_url = "http://quotes.toscrape.com/tag/deep-thoughts/"  # Server redirects to HTTP
+    
+    # Extract a relative link
+    relative_link = "/author/Albert-Einstein"
+    
+    # Standard URL joining uses the redirected (HTTP) base
+    resolved_url = urljoin(redirected_url, relative_link)
+    
+    print(f"Original URL:    {original_url}")
+    print(f"Redirected to:   {redirected_url}")
+    print(f"Relative link:   {relative_link}")
+    print(f"Resolved link:   {resolved_url}")
+    print(f"\n❌ Problem: Link is now HTTP instead of HTTPS!")
+    
+    return resolved_url
+
+def demonstrate_solution():
+    """Show the solution: preserve HTTPS for internal links"""
+    
+    print("\n" + "=" * 60)
+    print("DEMONSTRATING THE SOLUTION")
+    print("=" * 60)
+    
+    # Our normalize_url with HTTPS preservation
+    def normalize_url_with_preservation(href, base_url, preserve_https=False, original_scheme=None):
+        """Normalize URL with optional HTTPS preservation"""
+        
+        # Standard resolution
+        full_url = urljoin(base_url, href.strip())
+        
+        # Preserve HTTPS if requested
+        if preserve_https and original_scheme == 'https':
+            parsed_full = urlparse(full_url)
+            parsed_base = urlparse(base_url)
+            
+            # Only for same-domain links
+            if parsed_full.scheme == 'http' and parsed_full.netloc == parsed_base.netloc:
+                full_url = full_url.replace('http://', 'https://', 1)
+                print(f"  → Preserved HTTPS for {parsed_full.netloc}")
+        
+        return full_url
+    
+    # Same scenario as before
+    original_url = "https://quotes.toscrape.com/tag/deep-thoughts"
+    redirected_url = "http://quotes.toscrape.com/tag/deep-thoughts/"
+    relative_link = "/author/Albert-Einstein"
+    
+    # Without preservation (current behavior)
+    resolved_without = normalize_url_with_preservation(
+        relative_link, redirected_url,
+        preserve_https=False, original_scheme='https'
+    )
+    
+    print(f"\nWithout preservation:")
+    print(f"  Result: {resolved_without}")
+    
+    # With preservation (new feature)
+    resolved_with = normalize_url_with_preservation(
+        relative_link, redirected_url,
+        preserve_https=True, original_scheme='https'
+    )
+    
+    print(f"\nWith preservation (preserve_https_for_internal_links=True):")
+    print(f"  Result: {resolved_with}")
+    print(f"\n✅ Solution: Internal link stays HTTPS!")
+    
+    return resolved_with
+
+def test_edge_cases():
+    """Test important edge cases"""
+    
+    print("\n" + "=" * 60)
+    print("EDGE CASES")
+    print("=" * 60)
+    
+    from urllib.parse import urljoin, urlparse
+    
+    def preserve_https(href, base_url, original_scheme):
+        """Helper to test preservation logic"""
+        full_url = urljoin(base_url, href)
+        
+        if original_scheme == 'https':
+            parsed_full = urlparse(full_url)
+            parsed_base = urlparse(base_url)
+            # Fixed: check for protocol-relative URLs
+            if (parsed_full.scheme == 'http' and 
+                parsed_full.netloc == parsed_base.netloc and
+                not href.strip().startswith('//')):
+                full_url = full_url.replace('http://', 'https://', 1)
+        
+        return full_url
+    
+    test_cases = [
+        # (description, href, base_url, original_scheme, should_be_https)
+        ("External link", "http://other.com/page", "http://example.com", "https", False),
+        ("Already HTTPS", "/page", "https://example.com", "https", True),
+        ("No original HTTPS", "/page", "http://example.com", "http", False),
+        ("Subdomain", "/page", "http://sub.example.com", "https", True),
+        ("Protocol-relative", "//example.com/page", "http://example.com", "https", False),
+    ]
+    
+    for desc, href, base_url, orig_scheme, should_be_https in test_cases:
+        result = preserve_https(href, base_url, orig_scheme)
+        is_https = result.startswith('https://')
+        status = "✅" if is_https == should_be_https else "❌"
+        
+        print(f"\n{status} {desc}:")
+        print(f"  Input: {href} + {base_url}")
+        print(f"  Result: {result}")
+        print(f"  Expected HTTPS: {should_be_https}, Got: {is_https}")
+
+def usage_example():
+    """Show how to use the feature in crawl4ai"""
+    
+    print("\n" + "=" * 60)
+    print("USAGE IN CRAWL4AI")
+    print("=" * 60)
+    
+    print("""
+To enable HTTPS preservation in your crawl4ai code:
+
+```python
+from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
+
+async with AsyncWebCrawler() as crawler:
+    config = CrawlerRunConfig(
+        preserve_https_for_internal_links=True  # Enable HTTPS preservation
+    )
+    
+    result = await crawler.arun(
+        url="https://example.com",
+        config=config
+    )
+    
+    # All internal links will maintain HTTPS even if 
+    # the server redirects to HTTP
+```
+
+This is especially useful for:
+- Sites that redirect HTTPS to HTTP but still support HTTPS
+- Security-conscious crawling where you want to stay on HTTPS
+- Avoiding mixed content issues in downstream processing
+""")
+
+if __name__ == "__main__":
+    # Run all demonstrations
+    demonstrate_issue()
+    demonstrate_solution() 
+    test_edge_cases()
+    usage_example()
+    
+    print("\n" + "=" * 60)
+    print("✅ All tests complete!")
+    print("=" * 60)
Author	SHA1	Message	Date
ntohidi	bdacf61ca9	feat: update documentation for preserve_https_for_internal_links. ref #1410	2025-08-28 17:48:12 +08:00
ntohidi	f566c5a376	feat: add preserve_https_for_internal_links flag to maintain HTTPS during crawling. Ref #1410 Added a new `preserve_https_for_internal_links` configuration flag that preserves the original HTTPS scheme for same-domain links even when the server redirects to HTTP.	2025-08-28 17:38:40 +08:00