chore: Add function to clear the database
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -164,3 +164,4 @@ cython_debug/
|
||||
Crawl4AI.egg-info/
|
||||
Crawl4AI.egg-info/*
|
||||
crawler_data.db
|
||||
.vscode/
|
||||
@@ -51,3 +51,11 @@ def get_total_count(db_path: str) -> int:
|
||||
return result[0]
|
||||
except Exception as e:
|
||||
return 0
|
||||
|
||||
# Crete function to cler the database
|
||||
def clear_db(db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('DELETE FROM crawled_data')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
@@ -8,13 +8,13 @@ def main():
|
||||
crawler = WebCrawler(db_path='crawler_data.db')
|
||||
|
||||
# Fetch a single page
|
||||
single_url = UrlModel(url='https://kidocode.com', forced=True)
|
||||
single_url = UrlModel(url='https://techcrunch.com/', forced=True)
|
||||
result = crawler.fetch_page(
|
||||
single_url,
|
||||
provider= "openai/gpt-3.5-turbo",
|
||||
api_token = os.getenv('OPENAI_API_KEY'),
|
||||
extract_blocks_flag=True,
|
||||
word_count_threshold=5
|
||||
word_count_threshold=10
|
||||
)
|
||||
print(result.model_dump())
|
||||
|
||||
|
||||
8
main.py
8
main.py
@@ -10,7 +10,7 @@ import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import chromedriver_autoinstaller
|
||||
from functools import lru_cache
|
||||
from crawler.database import get_total_count
|
||||
from crawler.database import get_total_count, clear_db
|
||||
import os
|
||||
import uuid
|
||||
|
||||
@@ -56,6 +56,12 @@ async def get_total_url_count():
|
||||
count = get_total_count(db_path='crawler_data.db')
|
||||
return JSONResponse(content={"count": count})
|
||||
|
||||
# Add endpoit to clear db
|
||||
@app.get("/clear-db")
|
||||
async def clear_database():
|
||||
clear_db(db_path='crawler_data.db')
|
||||
return JSONResponse(content={"message": "Database cleared."})
|
||||
|
||||
@app.post("/crawl")
|
||||
async def crawl_urls(urls_input: UrlsInput, request: Request):
|
||||
global current_requests
|
||||
|
||||
Reference in New Issue
Block a user