chore: Delete existing database file and initialize new database
This commit deletes the existing database file and initializes a new database in the `crawl4ai/database.py` file. The `os.remove()` function is used to delete the file if it exists, and then the `init_db()` function is called to initialize the new database. This change is necessary to start with a clean database state.
This commit is contained in:
@@ -20,7 +20,7 @@ def init_db():
|
|||||||
extracted_content TEXT,
|
extracted_content TEXT,
|
||||||
success BOOLEAN,
|
success BOOLEAN,
|
||||||
media TEXT DEFAULT "{}",
|
media TEXT DEFAULT "{}",
|
||||||
link TEXT DEFAULT "{}",
|
links TEXT DEFAULT "{}",
|
||||||
metadata TEXT DEFAULT "{}",
|
metadata TEXT DEFAULT "{}",
|
||||||
screenshot TEXT DEFAULT ""
|
screenshot TEXT DEFAULT ""
|
||||||
)
|
)
|
||||||
@@ -127,6 +127,9 @@ def update_existing_records(new_column: str = "media", default_value: str = "{}"
|
|||||||
print(f"Error updating existing records: {e}")
|
print(f"Error updating existing records: {e}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
init_db() # Initialize the database if not already initialized
|
# Delete the existing database file
|
||||||
alter_db_add_screenshot("metadata") # Add the new column to the table
|
if os.path.exists(DB_PATH):
|
||||||
update_existing_records("metadata") # Update existing records to set the new column to an empty string
|
os.remove(DB_PATH)
|
||||||
|
init_db()
|
||||||
|
# alter_db_add_screenshot("COL_NAME")
|
||||||
|
|
||||||
|
|||||||
@@ -116,7 +116,6 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
|||||||
for block in blocks:
|
for block in blocks:
|
||||||
block['error'] = False
|
block['error'] = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error extracting blocks:", str(e))
|
|
||||||
parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
|
parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
|
||||||
blocks = parsed
|
blocks = parsed
|
||||||
if unparsed:
|
if unparsed:
|
||||||
|
|||||||
@@ -664,7 +664,6 @@ def extract_blocks(url, html, provider = DEFAULT_PROVIDER, api_token = None):
|
|||||||
for block in blocks:
|
for block in blocks:
|
||||||
block['error'] = False
|
block['error'] = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error extracting blocks:", str(e))
|
|
||||||
parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
|
parsed, unparsed = split_and_parse_json_objects(response.choices[0].message.content)
|
||||||
blocks = parsed
|
blocks = parsed
|
||||||
# Append all unparsed segments as onr error block and content is list of unparsed segments
|
# Append all unparsed segments as onr error block and content is list of unparsed segments
|
||||||
@@ -710,7 +709,6 @@ def extract_blocks_batch(batch_data, provider = "groq/llama3-70b-8192", api_toke
|
|||||||
blocks = json.loads(blocks)
|
blocks = json.loads(blocks)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error extracting blocks:", str(e))
|
|
||||||
blocks = [{
|
blocks = [{
|
||||||
"index": 0,
|
"index": 0,
|
||||||
"tags": ["error"],
|
"tags": ["error"],
|
||||||
|
|||||||
5
setup.py
5
setup.py
@@ -5,10 +5,15 @@ import subprocess
|
|||||||
from setuptools.command.install import install
|
from setuptools.command.install import install
|
||||||
|
|
||||||
# Create the .crawl4ai folder in the user's home directory if it doesn't exist
|
# Create the .crawl4ai folder in the user's home directory if it doesn't exist
|
||||||
|
# If the folder already exists, remove the cache folder
|
||||||
crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
|
crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
|
||||||
|
if os.path.exists(f"{crawl4ai_folder}/cache"):
|
||||||
|
subprocess.run(["rm", "-rf", f"{crawl4ai_folder}/cache"])
|
||||||
os.makedirs(crawl4ai_folder, exist_ok=True)
|
os.makedirs(crawl4ai_folder, exist_ok=True)
|
||||||
os.makedirs(f"{crawl4ai_folder}/cache", exist_ok=True)
|
os.makedirs(f"{crawl4ai_folder}/cache", exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Read the requirements from requirements.txt
|
# Read the requirements from requirements.txt
|
||||||
with open("requirements.txt") as f:
|
with open("requirements.txt") as f:
|
||||||
requirements = f.read().splitlines()
|
requirements = f.read().splitlines()
|
||||||
|
|||||||
Reference in New Issue
Block a user