chore: Add function to clear the database

This commit is contained in:
unclecode
2024-05-09 19:42:43 +08:00
parent f7c031c097
commit 181250cb93
4 changed files with 20 additions and 5 deletions

3
.gitignore vendored
View File

@@ -163,4 +163,5 @@ cython_debug/
Crawl4AI.egg-info/
Crawl4AI.egg-info/*
crawler_data.db
crawler_data.db
.vscode/

View File

@@ -50,4 +50,12 @@ def get_total_count(db_path: str) -> int:
conn.close()
return result[0]
except Exception as e:
return 0
return 0
# Crete function to cler the database
def clear_db(db_path: str):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('DELETE FROM crawled_data')
conn.commit()
conn.close()

View File

@@ -8,13 +8,13 @@ def main():
crawler = WebCrawler(db_path='crawler_data.db')
# Fetch a single page
single_url = UrlModel(url='https://kidocode.com', forced=True)
single_url = UrlModel(url='https://techcrunch.com/', forced=True)
result = crawler.fetch_page(
single_url,
provider= "openai/gpt-3.5-turbo",
api_token = os.getenv('OPENAI_API_KEY'),
extract_blocks_flag=True,
word_count_threshold=5
word_count_threshold=10
)
print(result.model_dump())

View File

@@ -10,7 +10,7 @@ import asyncio
from concurrent.futures import ThreadPoolExecutor, as_completed
import chromedriver_autoinstaller
from functools import lru_cache
from crawler.database import get_total_count
from crawler.database import get_total_count, clear_db
import os
import uuid
@@ -56,6 +56,12 @@ async def get_total_url_count():
count = get_total_count(db_path='crawler_data.db')
return JSONResponse(content={"count": count})
# Add endpoit to clear db
@app.get("/clear-db")
async def clear_database():
clear_db(db_path='crawler_data.db')
return JSONResponse(content={"message": "Database cleared."})
@app.post("/crawl")
async def crawl_urls(urls_input: UrlsInput, request: Request):
global current_requests