docs: update REAME browser installation command

- Remove Chrome from manual installation command - Keep Chromium as the only default browser in docs
refactor(install): use chromium as default browser
2025-01-01 17:24:44 +08:00 · 2025-01-01 17:19:54 +08:00 · 2025-01-01 16:54:03 +08:00 · 2025-01-01 16:33:43 +08:00 · 2025-01-01 16:10:08 +08:00 · 2025-01-01 15:52:01 +08:00
10 changed files with 317 additions and 102 deletions
--- a/README.md
+++ b/README.md
@@ -39,13 +39,17 @@ Crawl4AI is the #1 trending GitHub repository, actively maintained by a vibrant
 ```bash
 # Install the package
 pip install crawl4ai
+
+# Run post-installation setup
 crawl4ai-setup

-# Install Playwright with system dependencies (recommended)
-playwright install --with-deps 
+# Verify your installation
+crawl4ai-doctor
+```

-# Or install specific browsers:
-playwright install --with-deps chrome  # Recommended for Colab/Linux
+If you encounter any browser-related issues, you can install them manually:
+```bash
+python -m playwright install --with-deps chromium
 ```

 2. Run a simple web crawl:
--- a/crawl4ai/version.py
+++ b/crawl4ai/version.py
@@ -1,2 +1,2 @@
 # crawl4ai/_version.py
-__version__ = "0.4.24"
+__version__ = "0.4.241"
--- a/crawl4ai/content_filter_strategy.py
+++ b/crawl4ai/content_filter_strategy.py
@@ -83,7 +83,6 @@ class RelevantContentFilter(ABC):
                                
        return ' '.join(filter(None, query_parts))

-
    def extract_text_chunks(self, body: Tag, min_word_threshold: int = None) -> List[Tuple[str, str]]:
        """
        Extracts text chunks from a BeautifulSoup body element while preserving order.
--- a/crawl4ai/install.py
+++ b/crawl4ai/install.py
@@ -2,7 +2,6 @@ import subprocess
 import sys
 import asyncio
 from .async_logger import AsyncLogger, LogLevel
-from .docs_manager import DocsManager

 # Initialize logger
 logger = AsyncLogger(log_level=LogLevel.DEBUG, verbose=True)
@@ -12,24 +11,20 @@ def post_install():
    logger.info("Running post-installation setup...", tag="INIT")
    install_playwright()
    run_migration()
-    asyncio.run(setup_docs())
    logger.success("Post-installation setup completed!", tag="COMPLETE")
    
 def install_playwright():
    logger.info("Installing Playwright browsers...", tag="INIT")
    try:
-        subprocess.check_call([sys.executable, "-m", "playwright", "install"])
+        # subprocess.check_call([sys.executable, "-m", "playwright", "install", "--with-deps", "--force", "chrome"])
+        subprocess.check_call([sys.executable, "-m", "playwright", "install", "--with-deps", "--force", "chromium"])
        logger.success("Playwright installation completed successfully.", tag="COMPLETE")
    except subprocess.CalledProcessError as e:
-        logger.error(f"Error during Playwright installation: {e}", tag="ERROR")
-        logger.warning(
-            "Please run 'python -m playwright install' manually after the installation."
-        )
+        # logger.error(f"Error during Playwright installation: {e}", tag="ERROR")
+        logger.warning(f"Please run '{sys.executable} -m playwright install --with-deps' manually after the installation.")
    except Exception as e:
-        logger.error(f"Unexpected error during Playwright installation: {e}", tag="ERROR")
-        logger.warning(
-            "Please run 'python -m playwright install' manually after the installation."
-        )
+        # logger.error(f"Unexpected error during Playwright installation: {e}", tag="ERROR")
+        logger.warning(f"Please run '{sys.executable} -m playwright install --with-deps' manually after the installation.")

 def run_migration():
    """Initialize database during installation"""
@@ -45,7 +40,44 @@ def run_migration():
        logger.warning(f"Database initialization failed: {e}")
        logger.warning("Database will be initialized on first use")

-async def setup_docs():
-    """Download documentation files"""
-    docs_manager = DocsManager(logger)
-    await docs_manager.update_docs()
+async def run_doctor():
+    """Test if Crawl4AI is working properly"""
+    logger.info("Running Crawl4AI health check...", tag="INIT")
+    try:
+        from .async_webcrawler import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+
+        browser_config = BrowserConfig(
+            headless=True,
+            browser_type="chromium",
+            ignore_https_errors=True,
+            light_mode=True,
+            viewport_width=1280,
+            viewport_height=720
+        )
+
+        run_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            screenshot=True,
+        )
+
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            logger.info("Testing crawling capabilities...", tag="TEST")
+            result = await crawler.arun(
+                url="https://crawl4ai.com",
+                config=run_config
+            )
+
+            if result and result.markdown:
+                logger.success("✅ Crawling test passed!", tag="COMPLETE")
+                return True
+            else:
+                raise Exception("Failed to get content")
+
+    except Exception as e:
+        logger.error(f"❌ Test failed: {e}", tag="ERROR")
+        return False
+
+def doctor():
+    """Entry point for the doctor command"""
+    import asyncio
+    return asyncio.run(run_doctor())
--- a/docs/examples/v0_4_24_walkthrough.py
+++ b/docs/examples/v0_4_24_walkthrough.py
@@ -9,6 +9,7 @@ Each section includes detailed examples and explanations of the new capabilities
 import asyncio
 import os
 import json
+import re
 from typing import List, Optional, Dict, Any
 from pydantic import BaseModel, Field
 from crawl4ai import (
@@ -16,9 +17,12 @@ from crawl4ai import (
    BrowserConfig,
    CrawlerRunConfig,
    CacheMode,
-    LLMExtractionStrategy
+    LLMExtractionStrategy,
+    JsonCssExtractionStrategy
 )
-from crawl4ai.content_filter_strategy import PruningContentFilter
+from crawl4ai.content_filter_strategy import RelevantContentFilter
+from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator 
+from bs4 import BeautifulSoup

 # Sample HTML for demonstrations
 SAMPLE_HTML = """
@@ -68,10 +72,7 @@ async def demo_ssl_features():
    print("\n1. Enhanced SSL & Security Demo")
    print("--------------------------------")

-    browser_config = BrowserConfig(
-        ignore_https_errors=True,
-        verbose=True
-    )
+    browser_config = BrowserConfig()

    run_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
@@ -84,37 +85,91 @@ async def demo_ssl_features():
            config=run_config
        )
        print(f"SSL Crawl Success: {result.success}")
+        result.ssl_certificate.to_json(
+            os.path.join(os.getcwd(), "ssl_certificate.json")
+        )
        if not result.success:
            print(f"SSL Error: {result.error_message}")

 async def demo_content_filtering():
    """
    Smart Content Filtering Demo
-    --------------------------
+    ----------------------
    
-    Demonstrates the new content filtering system with:
-    1. Regular expression pattern matching
-    2. Length-based filtering
-    3. Custom filtering rules
-    4. Content chunking strategies
-    
-    This is particularly useful for:
-    - Removing advertisements and boilerplate content
-    - Extracting meaningful paragraphs
-    - Filtering out irrelevant sections
-    - Processing content in manageable chunks
+    Demonstrates advanced content filtering capabilities:
+    1. Custom filter to identify and extract specific content
+    2. Integration with markdown generation
+    3. Flexible pruning rules
    """
    print("\n2. Smart Content Filtering Demo")
    print("--------------------------------")

-    content_filter = PruningContentFilter(
-        min_word_threshold=50,
-        threshold_type='dynamic',
-        threshold=0.5
+    # Create a custom content filter
+    class CustomNewsFilter(RelevantContentFilter):
+        def __init__(self):
+            super().__init__()
+            # Add news-specific patterns
+            self.negative_patterns = re.compile(
+                r'nav|footer|header|sidebar|ads|comment|share|related|recommended|popular|trending',
+                re.I
+            )
+            self.min_word_count = 30  # Higher threshold for news content
+
+        def filter_content(self, html: str, min_word_threshold: int = None) -> List[str]:
+            """
+            Implements news-specific content filtering logic.
+            
+            Args:
+                html (str): HTML content to be filtered
+                min_word_threshold (int, optional): Minimum word count threshold
+                
+            Returns:
+                List[str]: List of filtered HTML content blocks
+            """
+            if not html or not isinstance(html, str):
+                return []
+                
+            soup = BeautifulSoup(html, 'lxml')
+            if not soup.body:
+                soup = BeautifulSoup(f'<body>{html}</body>', 'lxml')
+            
+            body = soup.find('body')
+            
+            # Extract chunks with metadata
+            chunks = self.extract_text_chunks(body, min_word_threshold or self.min_word_count)
+            
+            # Filter chunks based on news-specific criteria
+            filtered_chunks = []
+            for _, text, tag_type, element in chunks:
+                # Skip if element has negative class/id
+                if self.is_excluded(element):
+                    continue
+                    
+                # Headers are important in news articles
+                if tag_type == 'header':
+                    filtered_chunks.append(self.clean_element(element))
+                    continue
+                    
+                # For content, check word count and link density
+                text = element.get_text(strip=True)
+                if len(text.split()) >= (min_word_threshold or self.min_word_count):
+                    # Calculate link density
+                    links_text = ' '.join(a.get_text(strip=True) for a in element.find_all('a'))
+                    link_density = len(links_text) / len(text) if text else 1
+                    
+                    # Accept if link density is reasonable
+                    if link_density < 0.5:
+                        filtered_chunks.append(self.clean_element(element))
+            
+            return filtered_chunks
+
+    # Create markdown generator with custom filter
+    markdown_gen = DefaultMarkdownGenerator(
+        content_filter=CustomNewsFilter()
    )

    run_config = CrawlerRunConfig(
-        content_filter=content_filter,
+        markdown_generator=markdown_gen,
        cache_mode=CacheMode.BYPASS
    )

@@ -124,25 +179,22 @@ async def demo_content_filtering():
            config=run_config
        )
        print("Filtered Content Sample:")
-        print(result.markdown[:500] + "...\n")
+        print(result.markdown[:500])  # Show first 500 chars

 async def demo_json_extraction():
    """
-    Advanced JSON Extraction Demo
+    Improved JSON Extraction Demo
    ---------------------------
    
    Demonstrates the enhanced JSON extraction capabilities:
-    1. Using different input formats (markdown, html)
-    2. Base element attributes extraction
-    3. Complex nested structures
-    4. Multiple extraction patterns
+    1. Base element attributes extraction
+    2. Complex nested structures
+    3. Multiple extraction patterns
    
    Key features shown:
-    - Extracting from different input formats (markdown vs html)
    - Extracting attributes from base elements (href, data-* attributes)
    - Processing repeated patterns
    - Handling optional fields
-    - Computing derived values
    """
    print("\n3. Improved JSON Extraction Demo")
    print("--------------------------------")
@@ -152,13 +204,17 @@ async def demo_json_extraction():
        schema={
            "name": "Blog Posts",
            "baseSelector": "div.article-list",
+            "baseFields": [
+                {"name": "list_id", "type": "attribute", "attribute": "data-list-id"},
+                {"name": "category", "type": "attribute", "attribute": "data-category"}
+            ],
            "fields": [
                {
                    "name": "posts",
                    "selector": "article.post",
                    "type": "nested_list",
                    "baseFields": [
-                        {"name": "category", "type": "attribute", "attribute": "data-category"},
+                        {"name": "post_id", "type": "attribute", "attribute": "data-post-id"},
                        {"name": "author_id", "type": "attribute", "attribute": "data-author"}
                    ],
                    "fields": [
@@ -378,10 +434,10 @@ async def main():
    print("====================================")

    # Run all demos
-    # await demo_ssl_features()
-    # await demo_content_filtering()
-    # await demo_json_extraction()
-    await demo_input_formats()
+    await demo_ssl_features()
+    await demo_content_filtering()
+    await demo_json_extraction()
+    # await demo_input_formats()

 if __name__ == "__main__":
    asyncio.run(main())
--- a/docs/md_v3/tutorials/getting-started.md
+++ b/docs/md_v3/tutorials/getting-started.md
@@ -31,7 +31,14 @@ By the end of this guide, you’ll have installed Crawl4AI, performed a basic cr
 ```bash
 pip install crawl4ai
 crawl4ai-setup
-playwright install --with-deps  
+
+# Verify your installation
+crawl4ai-doctor
+```
+
+If you encounter any browser-related issues, you can install them manually:
+```bash
+python -m playwright install --with-deps chrome chromium
 ```

 - **`crawl4ai-setup`** installs and configures Playwright (Chromium by default).
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,76 @@
+[build-system]
+requires = ["setuptools>=64.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "Crawl4AI"
+dynamic = ["version"]
+description = "🚀🤖 Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
+readme = "README.md"
+requires-python = ">=3.9"
+license = {text = "MIT"}
+authors = [
+    {name = "Unclecode", email = "unclecode@kidocode.com"}
+]
+dependencies = [
+    "aiosqlite~=0.20",
+    "lxml~=5.3",
+    "litellm>=1.53.1",
+    "numpy>=1.26.0,<3",
+    "pillow~=10.4",
+    "playwright>=1.49.0",
+    "python-dotenv~=1.0",
+    "requests~=2.26",
+    "beautifulsoup4~=4.12",
+    "tf-playwright-stealth>=1.1.0",
+    "xxhash~=3.4",
+    "rank-bm25~=0.2",
+    "aiofiles>=24.1.0",
+    "colorama~=0.4",
+    "snowballstemmer~=2.2",
+    "pydantic>=2.10",
+    "pyOpenSSL>=24.3.0",
+    "psutil>=6.1.1",
+    "nltk>=3.9.1",
+    "playwright",
+    "aiofiles"
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+
+[project.optional-dependencies]
+torch = ["torch", "nltk", "scikit-learn"]
+transformer = ["transformers", "tokenizers"]
+cosine = ["torch", "transformers", "nltk"]
+sync = ["selenium"]
+all = [
+    "torch",
+    "nltk",
+    "scikit-learn",
+    "transformers",
+    "tokenizers",
+    "selenium"
+]
+
+[project.scripts]
+crawl4ai-download-models = "crawl4ai.model_loader:main"
+crawl4ai-migrate = "crawl4ai.migrations:main"
+crawl4ai-setup = "crawl4ai.install:post_install"
+crawl4ai-doctor = "crawl4ai.install:doctor"
+crawl = "crawl4ai.cli:cli"
+
+[tool.setuptools]
+packages = ["crawl4ai"]
+package-data = {"crawl4ai" = ["js_snippet/*.js"]}
+
+[tool.setuptools.dynamic]
+version = {attr = "crawl4ai.__version__.__version__"}
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+# Note: These requirements are also specified in pyproject.toml
+# This file is kept for development environment setup and compatibility
 aiosqlite~=0.20
 lxml~=5.3
 litellm>=1.53.1
@@ -14,4 +16,6 @@ aiofiles>=24.1.0
 colorama~=0.4
 snowballstemmer~=2.2
 pydantic>=2.10
-pyOpenSSL>=24.3.0
+pyOpenSSL>=24.3.0
+psutil>=6.1.1
+nltk>=3.9.1
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,8 @@ import os
 from pathlib import Path
 import shutil

+# Note: Most configuration is now in pyproject.toml
+# This setup.py is kept for backwards compatibility

 # Create the .crawl4ai folder in the user's home directory if it doesn't exist
 # If the folder already exists, remove the cache folder
@@ -28,28 +30,20 @@ cache_folder.mkdir(exist_ok=True)
 for folder in content_folders:
    (crawl4ai_folder / folder).mkdir(exist_ok=True)

-# Read requirements and version
-__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
-with open(os.path.join(__location__, "requirements.txt")) as f:
-    requirements = f.read().splitlines()
-
-with open("crawl4ai/__version__.py") as f:
-    for line in f:
-        if line.startswith("__version__"):
-            version = line.split("=")[1].strip().strip('"')
-            break
-
-# Define requirements
-default_requirements = requirements
-torch_requirements = ["torch", "nltk", "scikit-learn"]
-transformer_requirements = ["transformers", "tokenizers"]
-cosine_similarity_requirements = ["torch", "transformers", "nltk"]
-sync_requirements = ["selenium"]
+version = "0.0.0"  # This will be overridden by pyproject.toml's dynamic version
+try:
+    with open("crawl4ai/__version__.py") as f:
+        for line in f:
+            if line.startswith("__version__"):
+                version = line.split("=")[1].strip().strip('"')
+                break
+except Exception:
+    pass  # Let pyproject.toml handle version

 setup(
    name="Crawl4AI",
    version=version,
-    description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper",
+    description="🚀🤖 Crawl4AI: Open-source LLM Friendly Web Crawler & scraper",
    long_description=open("README.md", encoding="utf-8").read(),
    long_description_content_type="text/markdown",
    url="https://github.com/unclecode/crawl4ai",
@@ -58,38 +52,18 @@ setup(
    license="MIT",
    packages=find_packages(),
    package_data={
-        'crawl4ai': ['js_snippet/*.js']  # This matches the exact path structure
-    },
-    install_requires=default_requirements
-    + ["playwright", "aiofiles"],  # Added aiofiles
-    extras_require={
-        "torch": torch_requirements,
-        "transformer": transformer_requirements,
-        "cosine": cosine_similarity_requirements,
-        "sync": sync_requirements,
-        "all": default_requirements
-        + torch_requirements
-        + transformer_requirements
-        + cosine_similarity_requirements
-        + sync_requirements,
-    },
-    entry_points={
-        "console_scripts": [
-            "crawl4ai-download-models=crawl4ai.model_loader:main",
-            "crawl4ai-migrate=crawl4ai.migrations:main",  
-            'crawl4ai-setup=crawl4ai.install:post_install', 
-            'crawl=crawl4ai.cli:cli',
-        ],
+        'crawl4ai': ['js_snippet/*.js']
    },
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: Apache Software License",
        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
    ],
-    python_requires=">=3.7",
+    python_requires=">=3.9",
 )
--- a/ssl_certificate.json
+++ b/ssl_certificate.json
@@ -0,0 +1,63 @@
+{
+  "subject": {
+    "C": "US",
+    "ST": "California",
+    "L": "Los Angeles",
+    "O": "Internet Corporation for Assigned Names and Numbers",
+    "CN": "www.example.org"
+  },
+  "issuer": {
+    "C": "US",
+    "O": "DigiCert Inc",
+    "CN": "DigiCert Global G2 TLS RSA SHA256 2020 CA1"
+  },
+  "version": 2,
+  "serial_number": "0x75bcef30689c8addf13e51af4afe187",
+  "not_before": "20240130000000Z",
+  "not_after": "20250301235959Z",
+  "fingerprint": "45463a42413a32363a44383a43313a43453a33373a37393a41433a37373a36333a30413a39303a46383a32313a36333a41333a44363a38393a32453a44363a41463a45453a34303a38363a37323a43463a31393a45423a41373a41333a3632",
+  "signature_algorithm": "sha256WithRSAEncryption",
+  "raw_cert": "MIIHbjCCBlagAwIBAgIQB1vO8waJyK3fE+Ua9K/hhzANBgkqhkiG9w0BAQsFADBZMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMTMwMQYDVQQDEypEaWdpQ2VydCBHbG9iYWwgRzIgVExTIFJTQSBTSEEyNTYgMjAyMCBDQTEwHhcNMjQwMTMwMDAwMDAwWhcNMjUwMzAxMjM1OTU5WjCBljELMAkGA1UEBhMCVVMxEzARBgNVBAgTCkNhbGlmb3JuaWExFDASBgNVBAcTC0xvcyBBbmdlbGVzMUIwQAYDVQQKDDlJbnRlcm5ldMKgQ29ycG9yYXRpb27CoGZvcsKgQXNzaWduZWTCoE5hbWVzwqBhbmTCoE51bWJlcnMxGDAWBgNVBAMTD3d3dy5leGFtcGxlLm9yZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAIaFD7sO+cpf2fXgCjIsM9mqDgcpqC8IrXi9wga/9y0rpqcnPVOmTMNLsid3INbBVEm4CNr5cKlh9rJJnWlX2vttJDRyLkfwBD+dsVvivGYxWTLmqX6/1LDUZPVrynv/cltemtg/1Aay88jcj2ZaRoRmqBgVeacIzgU8+zmJ7236TnFSe7fkoKSclsBhPaQKcE3Djs1uszJs8sdECQTdoFX9I6UgeLKFXtg7rRf/hcW5dI0zubhXbrW8aWXbCzySVZn0c7RkJMpnTCiZzNxnPXnHFpwr5quqqjVyN/aBKkjoP04Zmr+eRqoyk/+lslq0sS8eaYSSHbC5ja/yMWyVhvMCAwEAAaOCA/IwggPuMB8GA1UdIwQYMBaAFHSFgMBmx9833s+9KTeqAx2+7c0XMB0GA1UdDgQWBBRM/tASTS4hz2v68vK4TEkCHTGRijCBgQYDVR0RBHoweIIPd3d3LmV4YW1wbGUub3JnggtleGFtcGxlLm5ldIILZXhhbXBsZS5lZHWCC2V4YW1wbGUuY29tggtleGFtcGxlLm9yZ4IPd3d3LmV4YW1wbGUuY29tgg93d3cuZXhhbXBsZS5lZHWCD3d3dy5leGFtcGxlLm5ldDA+BgNVHSAENzA1MDMGBmeBDAECAjApMCcGCCsGAQUFBwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwDgYDVR0PAQH/BAQDAgWgMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjCBnwYDVR0fBIGXMIGUMEigRqBEhkJodHRwOi8vY3JsMy5kaWdpY2VydC5jb20vRGlnaUNlcnRHbG9iYWxHMlRMU1JTQVNIQTI1NjIwMjBDQTEtMS5jcmwwSKBGoESGQmh0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbEcyVExTUlNBU0hBMjU2MjAyMENBMS0xLmNybDCBhwYIKwYBBQUHAQEEezB5MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wUQYIKwYBBQUHMAKGRWh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbEcyVExTUlNBU0hBMjU2MjAyMENBMS0xLmNydDAMBgNVHRMBAf8EAjAAMIIBfQYKKwYBBAHWeQIEAgSCAW0EggFpAWcAdABOdaMnXJoQwzhbbNTfP1LrHfDgjhuNacCx+mSxYpo53wAAAY1b0vxkAAAEAwBFMEMCH0BRCgxPbBBVxhcWZ26a8JCe83P1JZ6wmv56GsVcyMACIDgpMbEo5HJITTRPnoyT4mG8cLrWjEvhchUdEcWUuk1TAHYAfVkeEuF4KnscYWd8Xv340IdcFKBOlZ65Ay/ZDowuebgAAAGNW9L8MAAABAMARzBFAiBdv5Z3pZFbfgoM3tGpCTM3ZxBMQsxBRSdTS6d8d2NAcwIhALLoCT9mTMN9OyFzIBV5MkXVLyuTf2OAzAOa7d8x2H6XAHcA5tIxY0B3jMEQQQbXcbnOwdJA9paEhvu6hzId/R43jlAAAAGNW9L8XwAABAMASDBGAiEA4Koh/VizdQU1tjZ2E2VGgWSXXkwnQmiYhmAeKcVLHeACIQD7JIGFsdGol7kss2pe4lYrCgPVc+iGZkuqnj26hqhr0TANBgkqhkiG9w0BAQsFAAOCAQEABOFuAj4N4yNG9OOWNQWTNSICC4Rd4nOG1HRP/Bsnrz7KrcPORtb6D+Jx+Q0amhO31QhIvVBYs14gY4Ypyj7MzHgm4VmPXcqLvEkxb2G9Qv9hYuEiNSQmm1fr5QAN/0AzbEbCM3cImLJ69kP5bUjfv/76KB57is8tYf9sh5ikLGKauxCM/zRIcGa3bXLDafk5S2g5Vr2hs230d/NGW1wZrE+zdGuMxfGJzJP+DAFviBfcQnFg4+1zMEKcqS87oniOyG+60RMM0MdejBD7AS43m9us96Gsun/4kufLQUTIFfnzxLutUV++3seshgefQOy5C/ayi8y1VTNmujPCxPCi6Q==",
+  "extensions": [
+    {
+      "name": "authorityKeyIdentifier",
+      "value": "74:85:80:C0:66:C7:DF:37:DE:CF:BD:29:37:AA:03:1D:BE:ED:CD:17"
+    },
+    {
+      "name": "subjectKeyIdentifier",
+      "value": "4C:FE:D0:12:4D:2E:21:CF:6B:FA:F2:F2:B8:4C:49:02:1D:31:91:8A"
+    },
+    {
+      "name": "subjectAltName",
+      "value": "DNS:www.example.org, DNS:example.net, DNS:example.edu, DNS:example.com, DNS:example.org, DNS:www.example.com, DNS:www.example.edu, DNS:www.example.net"
+    },
+    {
+      "name": "certificatePolicies",
+      "value": "Policy: 2.23.140.1.2.2\n  CPS: http://www.digicert.com/CPS"
+    },
+    {
+      "name": "keyUsage",
+      "value": "Digital Signature, Key Encipherment"
+    },
+    {
+      "name": "extendedKeyUsage",
+      "value": "TLS Web Server Authentication, TLS Web Client Authentication"
+    },
+    {
+      "name": "crlDistributionPoints",
+      "value": "Full Name:\n  URI:http://crl3.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crl\nFull Name:\n  URI:http://crl4.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crl"
+    },
+    {
+      "name": "authorityInfoAccess",
+      "value": "OCSP - URI:http://ocsp.digicert.com\nCA Issuers - URI:http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"
+    },
+    {
+      "name": "basicConstraints",
+      "value": "CA:FALSE"
+    },
+    {
+      "name": "ct_precert_scts",
+      "value": "Signed Certificate Timestamp:\n    Version   : v1 (0x0)\n    Log ID    : 4E:75:A3:27:5C:9A:10:C3:38:5B:6C:D4:DF:3F:52:EB:\n                1D:F0:E0:8E:1B:8D:69:C0:B1:FA:64:B1:62:9A:39:DF\n    Timestamp : Jan 30 19:22:50.340 2024 GMT\n    Extensions: none\n    Signature : ecdsa-with-SHA256\n                30:43:02:1F:40:51:0A:0C:4F:6C:10:55:C6:17:16:67:\n                6E:9A:F0:90:9E:F3:73:F5:25:9E:B0:9A:FE:7A:1A:C5:\n                5C:C8:C0:02:20:38:29:31:B1:28:E4:72:48:4D:34:4F:\n                9E:8C:93:E2:61:BC:70:BA:D6:8C:4B:E1:72:15:1D:11:\n                C5:94:BA:4D:53\nSigned Certificate Timestamp:\n    Version   : v1 (0x0)\n    Log ID    : 7D:59:1E:12:E1:78:2A:7B:1C:61:67:7C:5E:FD:F8:D0:\n                87:5C:14:A0:4E:95:9E:B9:03:2F:D9:0E:8C:2E:79:B8\n    Timestamp : Jan 30 19:22:50.288 2024 GMT\n    Extensions: none\n    Signature : ecdsa-with-SHA256\n                30:45:02:20:5D:BF:96:77:A5:91:5B:7E:0A:0C:DE:D1:\n                A9:09:33:37:67:10:4C:42:CC:41:45:27:53:4B:A7:7C:\n                77:63:40:73:02:21:00:B2:E8:09:3F:66:4C:C3:7D:3B:\n                21:73:20:15:79:32:45:D5:2F:2B:93:7F:63:80:CC:03:\n                9A:ED:DF:31:D8:7E:97\nSigned Certificate Timestamp:\n    Version   : v1 (0x0)\n    Log ID    : E6:D2:31:63:40:77:8C:C1:10:41:06:D7:71:B9:CE:C1:\n                D2:40:F6:96:84:86:FB:BA:87:32:1D:FD:1E:37:8E:50\n    Timestamp : Jan 30 19:22:50.335 2024 GMT\n    Extensions: none\n    Signature : ecdsa-with-SHA256\n                30:46:02:21:00:E0:AA:21:FD:58:B3:75:05:35:B6:36:\n                76:13:65:46:81:64:97:5E:4C:27:42:68:98:86:60:1E:\n                29:C5:4B:1D:E0:02:21:00:FB:24:81:85:B1:D1:A8:97:\n                B9:2C:B3:6A:5E:E2:56:2B:0A:03:D5:73:E8:86:66:4B:\n                AA:9E:3D:BA:86:A8:6B:D1"
+    }
+  ]
+}
Author	SHA1	Message	Date
UncleCode	5313c71a0d	docs: update REAME browser installation command - Remove Chrome from manual installation command - Keep Chromium as the only default browser in docs	2025-01-01 17:24:44 +08:00
UncleCode	d36ef3d424	refactor(install): use chromium as default browser - Remove Chrome installation to reduce setup time - Keep Chromium as default browser for better cross-platform compatibility	2025-01-01 17:19:54 +08:00
UncleCode	4a4f613238	docs: simplify installation instructions - Add crawl4ai-doctor command to verify installation - Update browser installation instructions in README and docs - Move optional features to documentation - Add manual browser installation steps as fallback - Update getting-started guide with verification step	2025-01-01 16:54:03 +08:00
UncleCode	dc6a24618e	feat(install): add doctor command and force browser install - Add --force flag to Playwright browser installation - Add doctor command to test crawling functionality - Install Chrome and Chromium browsers explicitly - Add crawl4ai-doctor entry point in pyproject.toml - Implement simple health check focused on crawling test	2025-01-01 16:33:43 +08:00
UncleCode	74a7c6dbb6	feat(install): specify chrome and chromium for playwright - Install Chrome and Chromium browsers explicitly - Split browser installation into separate commands	2025-01-01 16:10:08 +08:00
UncleCode	67f65f958b	refactor(build): simplify setup.py configuration - Remove dependency management from setup.py - Remove entry points configuration (moved to pyproject.toml) - Keep minimal setup.py for backwards compatibility - Clean up package metadata structure	2025-01-01 15:52:01 +08:00
UncleCode	78b6ba5cef	build: modernize package configuration with pyproject.toml - Add pyproject.toml for PEP 517 build system support - Configure dependencies, scripts, and metadata in pyproject.toml - Set Python requirement to >=3.9 and add support up to 3.13 - Keep setup.py for backwards compatibility - Move package dependencies and entry points to pyproject.toml	2025-01-01 15:45:27 +08:00
UncleCode	3f019d34cc	docs: update project description emojis - Change project description emojis from 🔥🕷️ to 🚀🤖 - Update emojis consistently in both setup.py and pyproject.toml	2025-01-01 15:39:33 +08:00
UncleCode	304260e484	refactor(install): simplify Playwright installation error handling - Remove setup_docs() call from post_install() - Simplify error messages for Playwright installation failures - Use sys.executable for more accurate Python path in error messages - Add --with-deps flag to Playwright install command	2025-01-01 15:33:36 +08:00
UncleCode	704bd66b63	Uphrade plawyright installation command to install dependencies	2025-01-01 15:23:16 +08:00
UncleCode	1acc162c18	Bumb version v0.4.241	2025-01-01 15:16:06 +08:00
UncleCode	553c97a0c1	Fix bug reported in issue https://github.com/unclecode/crawl4ai/issues/396	2025-01-01 15:15:14 +08:00
UncleCode	bd66befcf0	Fix issue in 0.4.24 walkthrough	2024-12-31 21:07:58 +08:00
UncleCode	19b0a5ae82	Update 0.4.24 walkthrough	2024-12-31 21:01:46 +08:00