From 181250cb93f8e2754974c16d82cbde11ad593b67 Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Thu, 9 May 2024 19:42:43 +0800
Subject: [PATCH] `chore: Add function to clear the database`

---
 .gitignore             |  3 ++-
 crawler/database.py    | 10 +++++++++-
 examples/quickstart.py |  4 ++--
 main.py                |  8 +++++++-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 020ec7c4..59f39306 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,4 +163,5 @@ cython_debug/
 
 Crawl4AI.egg-info/
 Crawl4AI.egg-info/*
-crawler_data.db
\ No newline at end of file
+crawler_data.db
+.vscode/
\ No newline at end of file
diff --git a/crawler/database.py b/crawler/database.py
index 294d894f..89048d05 100644
--- a/crawler/database.py
+++ b/crawler/database.py
@@ -50,4 +50,12 @@ def get_total_count(db_path: str) -> int:
         conn.close()
         return result[0]
     except Exception as e:
-        return 0
\ No newline at end of file
+        return 0
+    
+# Crete function to cler the database
+def clear_db(db_path: str):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute('DELETE FROM crawled_data')
+    conn.commit()
+    conn.close()
\ No newline at end of file
diff --git a/examples/quickstart.py b/examples/quickstart.py
index 9fc26a30..57f71eaa 100644
--- a/examples/quickstart.py
+++ b/examples/quickstart.py
@@ -8,13 +8,13 @@ def main():
     crawler = WebCrawler(db_path='crawler_data.db')
 
     # Fetch a single page
-    single_url = UrlModel(url='https://kidocode.com', forced=True)
+    single_url = UrlModel(url='https://techcrunch.com/', forced=True)
     result = crawler.fetch_page(
         single_url, 
         provider= "openai/gpt-3.5-turbo", 
         api_token = os.getenv('OPENAI_API_KEY'), 
         extract_blocks_flag=True,
-        word_count_threshold=5
+        word_count_threshold=10
     )
     print(result.model_dump())
 
diff --git a/main.py b/main.py
index 6191c9ce..c8a4bdcf 100644
--- a/main.py
+++ b/main.py
@@ -10,7 +10,7 @@ import asyncio
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import chromedriver_autoinstaller
 from functools import lru_cache
-from crawler.database import get_total_count
+from crawler.database import get_total_count, clear_db
 import os
 import uuid
 
@@ -56,6 +56,12 @@ async def get_total_url_count():
     count = get_total_count(db_path='crawler_data.db')
     return JSONResponse(content={"count": count})
 
+# Add endpoit to clear db
+@app.get("/clear-db")
+async def clear_database():
+    clear_db(db_path='crawler_data.db')
+    return JSONResponse(content={"message": "Database cleared."})
+
 @app.post("/crawl")
 async def crawl_urls(urls_input: UrlsInput, request: Request):
     global current_requests