From 11393183f7ba7ee5ec8d6b8c9c5a8c6444c65aac Mon Sep 17 00:00:00 2001 From: unclecode Date: Mon, 13 May 2024 00:39:06 +0800 Subject: [PATCH] Add Colab setup scritp. --- setup_colab.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 setup_colab.py diff --git a/setup_colab.py b/setup_colab.py new file mode 100644 index 00000000..511234d8 --- /dev/null +++ b/setup_colab.py @@ -0,0 +1,35 @@ +import os + +def install_crawl4ai(): + print("Installing Crawl4AI and its dependencies...") + + # Install dependencies + !pip install -U 'spacy[cuda12x]' + !apt-get update -y + !apt install chromium-chromedriver -y + !pip install chromedriver_autoinstaller + !pip install git+https://github.com/unclecode/crawl4ai.git@new-release-0.0.2 + + # Install ChromeDriver + import chromedriver_autoinstaller + chromedriver_autoinstaller.install() + + # Download the reuters model + repo_url = "https://github.com/unclecode/crawl4ai.git" + branch = "new-release-0.0.2" + folder_path = "models/reuters" + + !git clone -b {branch} {repo_url} + !mkdir -p models + + repo_folder = "crawl4ai" + source_folder = os.path.join(repo_folder, folder_path) + destination_folder = "models" + + !mv "{source_folder}" "{destination_folder}" + !rm -rf "{repo_folder}" + + print("Installation and model download completed successfully!") + +# Run the installer +install_crawl4ai() \ No newline at end of file