Compare commits

...

5 Commits

Author SHA1 Message Date
UncleCode
ac9d83c72f Update gitignore 2024-10-27 19:29:04 +08:00
UncleCode
ff9149b5c9 Merge branch 'main' of https://github.com/unclecode/crawl4ai 2024-10-27 19:28:05 +08:00
UncleCode
32f57c49d6 Merge pull request #194 from IdrisHanafi/feat/customize-crawl-base-directory
Support for custom crawl base directory
2024-10-24 13:09:27 +02:00
Idris Hanafi
a5f627ba1a feat: customize crawl base directory 2024-10-21 17:58:39 -04:00
UncleCode
dbb587d681 Update gitignore 2024-10-17 21:38:48 +08:00
2 changed files with 7 additions and 2 deletions

6
.gitignore vendored
View File

@@ -202,5 +202,9 @@ todo.md
git_changes.py
git_changes.md
pypi_build.sh
git_issues.py
git_issues.md
.tests/
.tests/
.issues/

View File

@@ -23,13 +23,14 @@ class AsyncWebCrawler:
self,
crawler_strategy: Optional[AsyncCrawlerStrategy] = None,
always_by_pass_cache: bool = False,
base_directory: str = str(Path.home()),
**kwargs,
):
self.crawler_strategy = crawler_strategy or AsyncPlaywrightCrawlerStrategy(
**kwargs
)
self.always_by_pass_cache = always_by_pass_cache
self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai")
self.crawl4ai_folder = os.path.join(base_directory, ".crawl4ai")
os.makedirs(self.crawl4ai_folder, exist_ok=True)
os.makedirs(f"{self.crawl4ai_folder}/cache", exist_ok=True)
self.ready = False