diff --git a/README.md b/README.md index aba3d118..7b6529f4 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ from crawl4ai import WebCrawler crawler = WebCrawler() # Run the crawler with keyword filtering and CSS selector -result = crawler.run(url="https://www.example.com") +result = crawler.run(url="https://www.nbcnews.com/business") print(result) # {url, html, markdown, extracted_content, metadata} ``` @@ -63,7 +63,7 @@ crawler = WebCrawler(crawler_strategy=crawler_strategy) # Run the crawler with keyword filtering and CSS selector result = crawler.run( - url="https://www.example.com", + url="https://www.nbcnews.com/business", extraction_strategy=CosineStrategy( semantic_filter="technology", ), @@ -71,7 +71,7 @@ result = crawler.run( # Run the crawler with LLM extraction strategy result = crawler.run( - url="https://www.example.com", + url="https://www.nbcnews.com/business", extraction_strategy=LLMExtractionStrategy( provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY'), @@ -93,16 +93,16 @@ With Crawl4AI, you can perform advanced web crawling and data extraction tasks w ## Table of Contents -1. [Features](#features) -2. [Installation](#installation) -3. [REST API/Local Server](#using-the-local-server-ot-rest-api) -4. [Python Library Usage](#usage) -5. [Parameters](#parameters) -6. [Chunking Strategies](#chunking-strategies) -7. [Extraction Strategies](#extraction-strategies) -8. [Contributing](#contributing) -9. [License](#license) -10. [Contact](#contact) +1. [Features](#features-) +2. [Installation](#installation-) +3. [REST API/Local Server](#using-the-local-server-ot-rest-api-) +4. [Python Library Usage](#python-library-usage-) +5. [Parameters](#parameters-) +6. [Chunking Strategies](#chunking-strategies-) +7. [Extraction Strategies](#extraction-strategies-) +8. [Contributing](#contributing-) +9. [License](#license-) +10. [Contact](#contact-) ## Features ✨ @@ -168,7 +168,7 @@ To use the REST API, send a POST request to `https://crawl4ai.com/crawl` with th **Example Request:** ```json { - "urls": ["https://www.example.com"], + "urls": ["https://www.nbcnews.com/business"], "include_raw_html": false, "bypass_cache": true, "word_count_threshold": 5, @@ -195,7 +195,7 @@ To use the REST API, send a POST request to `https://crawl4ai.com/crawl` with th "status": "success", "data": [ { - "url": "https://www.example.com", + "url": "https://www.nbcnews.com/business", "extracted_content": "...", "html": "...", "markdown": "...", diff --git a/docs/examples/quickstart.py b/docs/examples/quickstart.py index fa3e9045..73772c25 100644 --- a/docs/examples/quickstart.py +++ b/docs/examples/quickstart.py @@ -128,7 +128,7 @@ def add_llm_extraction_strategy(crawler): print_result(result) result = crawler.run( - url="https://www.example.com", + url="https://www.nbcnews.com/business", extraction_strategy=LLMExtractionStrategy( provider="openai/gpt-4o", api_token=os.getenv('OPENAI_API_KEY'), diff --git a/setup.py b/setup.py index e26c69cd..bfacedf6 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ requirements_without_torch_transformers_nlkt = [req for req in requirements if n setup( name="Crawl4AI", - version="0.1.2", + version="0.2.0", description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & Scrapper", long_description=open("README.md").read(), long_description_content_type="text/markdown",