* fix: Update export of URLPatternFilter * chore: Add dependancy for cchardet in requirements * docs: Update example for deep crawl in release note for v0.5 * Docs: update the example for memory dispatcher * docs: updated example for crawl strategies * Refactor: Removed wrapping in if __name__==main block since this is a markdown file. * chore: removed cchardet from dependancy list, since unclecode is planning to remove it * docs: updated the example for proxy rotation to a working example * feat: Introduced ProxyConfig param * Add tutorial for deep crawl & update contributor list for bug fixes in feb alpha-1 * chore: update and test new dependancies * feat:Make PyPDF2 a conditional dependancy * updated tutorial and release note for v0.5 * docs: update docs for deep crawl, and fix a typo in docker-deployment markdown filename * refactor: 1. Deprecate markdown_v2 2. Make markdown backward compatible to behave as a string when needed. 3. Fix LlmConfig usage in cli 4. Deprecate markdown_v2 in cli 5. Update AsyncWebCrawler for changes in CrawlResult * fix: Bug in serialisation of markdown in acache_url * Refactor: Added deprecation errors for fit_html and fit_markdown directly on markdown. Now access them via markdown * fix: remove deprecated markdown_v2 from docker * Refactor: remove deprecated fit_markdown and fit_html from result * refactor: fix cache retrieval for markdown as a string * chore: update all docs, examples and tests with deprecation announcements for markdown_v2, fit_html, fit_markdown
98 lines
2.3 KiB
TOML
98 lines
2.3 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=64.0.0", "wheel"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "Crawl4AI"
|
|
dynamic = ["version"]
|
|
description = "🚀🤖 Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
|
|
readme = "README.md"
|
|
requires-python = ">=3.9"
|
|
license = {text = "MIT"}
|
|
authors = [
|
|
{name = "Unclecode", email = "unclecode@kidocode.com"}
|
|
]
|
|
dependencies = [
|
|
"aiosqlite~=0.20",
|
|
"lxml~=5.3",
|
|
"litellm>=1.53.1",
|
|
"numpy>=1.26.0,<3",
|
|
"pillow~=10.4",
|
|
"playwright>=1.49.0",
|
|
"python-dotenv~=1.0",
|
|
"requests~=2.26",
|
|
"beautifulsoup4~=4.12",
|
|
"tf-playwright-stealth>=1.1.0",
|
|
"xxhash~=3.4",
|
|
"rank-bm25~=0.2",
|
|
"aiofiles>=24.1.0",
|
|
"colorama~=0.4",
|
|
"snowballstemmer~=2.2",
|
|
"pydantic>=2.10",
|
|
"pyOpenSSL>=24.3.0",
|
|
"psutil>=6.1.1",
|
|
"nltk>=3.9.1",
|
|
"playwright",
|
|
"aiofiles",
|
|
"rich>=13.9.4",
|
|
"cssselect>=1.2.0",
|
|
"httpx==0.27.2",
|
|
"fake-useragent>=2.0.3",
|
|
"click>=8.1.7",
|
|
"pyperclip>=1.8.2",
|
|
"faust-cchardet>=2.1.19",
|
|
"aiohttp>=3.11.11"
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Intended Audience :: Developers",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
pdf = ["PyPDF2"]
|
|
torch = ["torch", "nltk", "scikit-learn"]
|
|
transformer = ["transformers", "tokenizers"]
|
|
cosine = ["torch", "transformers", "nltk"]
|
|
sync = ["selenium"]
|
|
all = [
|
|
"PyPDF2",
|
|
"torch",
|
|
"nltk",
|
|
"scikit-learn",
|
|
"transformers",
|
|
"tokenizers",
|
|
"selenium",
|
|
"PyPDF2"
|
|
]
|
|
|
|
[project.scripts]
|
|
crawl4ai-download-models = "crawl4ai.model_loader:main"
|
|
crawl4ai-migrate = "crawl4ai.migrations:main"
|
|
crawl4ai-setup = "crawl4ai.install:post_install"
|
|
crawl4ai-doctor = "crawl4ai.install:doctor"
|
|
crwl = "crawl4ai.cli:cli"
|
|
|
|
[tool.setuptools]
|
|
packages = {find = {where = ["."], include = ["crawl4ai*"]}}
|
|
|
|
[tool.setuptools.package-data]
|
|
crawl4ai = ["js_snippet/*.js"]
|
|
|
|
[tool.setuptools.dynamic]
|
|
version = {attr = "crawl4ai.__version__.__version__"}
|
|
|
|
[tool.uv.sources]
|
|
crawl4ai = { workspace = true }
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"crawl4ai",
|
|
]
|