From 597fe8bdb7a7a1df154cc149a4ffcbcba848c57c Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Fri, 5 Jul 2024 17:04:57 +0800
Subject: [PATCH] chore: Delete existing database file and initialize new
 database

This commit deletes the existing database file and initializes a new database in the `crawl4ai/database.py` file. The `os.remove()` function is used to delete the file if it exists, and then the `init_db()` function is called to initialize the new database. This change is necessary to start with a clean database state.
---
 crawl4ai/database.py            | 11 +++++++----
 crawl4ai/extraction_strategy.py |  1 -
 crawl4ai/utils.py               |  2 --
 setup.py                        |  5 +++++
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/crawl4ai/database.py b/crawl4ai/database.py
index 47f41748..37d94463 100644
--- a/crawl4ai/database.py
+++ b/crawl4ai/database.py
@@ -20,7 +20,7 @@ def init_db():
             extracted_content TEXT,
             success BOOLEAN,
             media TEXT DEFAULT "{}",
-            link TEXT DEFAULT "{}",
+            links TEXT DEFAULT "{}",
             metadata TEXT DEFAULT "{}",
             screenshot TEXT DEFAULT ""
         )
@@ -127,6 +127,9 @@ def update_existing_records(new_column: str = "media", default_value: str = "{}"
         print(f"Error updating existing records: {e}")
 
 if __name__ == "__main__":
-    init_db()  # Initialize the database if not already initialized
-    alter_db_add_screenshot("metadata")  # Add the new column to the table
-    update_existing_records("metadata")  # Update existing records to set the new column to an empty string
+    # Delete the existing database file
+    if os.path.exists(DB_PATH):
+        os.remove(DB_PATH)
+    init_db()  
+    # alter_db_add_screenshot("COL_NAME")
+    
diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
index d4415c88..5d5ac836 100644
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -116,7 +116,6 @@ class LLMExtractionStrategy(ExtractionStrategy):
             for block in blocks:
                 block['error'] = False
         except Exception as e:
-            print("Error extracting blocks:", str(e))
             parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
             blocks = parsed
             if unparsed:
diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index 474ce395..c8d4b993 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -664,7 +664,6 @@ def extract_blocks(url, html, provider = DEFAULT_PROVIDER, api_token = None):
         for block in blocks:
             block['error'] = False
     except Exception as e:
-        print("Error extracting blocks:", str(e))
         parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
         blocks = parsed
         # Append all unparsed segments as onr error block and content is list of unparsed segments
@@ -710,7 +709,6 @@ def extract_blocks_batch(batch_data, provider = "groq/llama3-70b-8192", api_toke
             blocks = json.loads(blocks)
 
         except Exception as e:
-            print("Error extracting blocks:", str(e))
             blocks = [{
                 "index": 0,
                 "tags": ["error"],
diff --git a/setup.py b/setup.py
index 468dc56e..674d628e 100644
--- a/setup.py
+++ b/setup.py
@@ -5,10 +5,15 @@ import subprocess
 from setuptools.command.install import install
 
 # Create the .crawl4ai folder in the user's home directory if it doesn't exist
+# If the folder already exists, remove the cache folder
 crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
+if os.path.exists(f"{crawl4ai_folder}/cache"):
+    subprocess.run(["rm", "-rf", f"{crawl4ai_folder}/cache"])
 os.makedirs(crawl4ai_folder, exist_ok=True)
 os.makedirs(f"{crawl4ai_folder}/cache", exist_ok=True)
 
+
+
 # Read the requirements from requirements.txt
 with open("requirements.txt") as f:
     requirements = f.read().splitlines()