Add recipe images, update README, and REST api example

2024-06-07 20:43:50 +08:00
parent 768d048e1c
commit a19379aa58
11 changed files with 66 additions and 24 deletions
--- a/docs/examples/rest_call.py
+++ b/docs/examples/rest_call.py
@@ -1,18 +1,54 @@

-
-
-import requests, base64
+import requests, base64, os

 data = {
-  "urls": [
-    "https://www.nbcnews.com/business"
-  ],
-  "screenshot": True
+    "urls": [
+        "https://www.nbcnews.com/business"
+    ],
+    "screenshot": True,
 }

-response = requests.post("https://crawl4ai.com/crawl", json=data) # OR local host if your run locally 
-result = response.json()['results'][0]

+# Example of executing a JS script on the page before extracting the content
+# data = {
+#     "urls": [
+#         "https://www.nbcnews.com/business"
+#     ],
+#     "screenshot": True,
+#     'js' : ["""
+#     const loadMoreButton = Array.from(document.querySelectorAll('button')).
+#     find(button => button.textContent.includes('Load More'));
+#     loadMoreButton && loadMoreButton.click();
+#     """]
+# }
+
+# Example of using a custom extraction strategy
+# data = {
+#     "urls": [
+#         "https://www.nbcnews.com/business"
+#     ],
+#     "extraction_strategy": "CosineStrategy",
+#     "extraction_strategy_args": {
+#         "semantic_filter": "inflation rent prices"
+#     },
+# }
+
+# Example of using LLM to extract content
+# data = {
+#     "urls": [
+#         "https://www.nbcnews.com/business"
+#     ],
+#     "extraction_strategy": "LLMExtractionStrategy",
+#     "extraction_strategy_args": {
+#         "provider": "groq/llama3-8b-8192",
+#         "api_token": os.environ.get("GROQ_API_KEY"),
+#         "instruction": """I am interested in only financial news, 
+#         and translate them in French."""
+#     },
+# }
+
+response = requests.post("https://crawl4ai.com/crawl", json=data) 
+result = response.json()['results'][0]

 print(result['markdown'])
 print(result['cleaned_html'])
@@ -24,3 +60,8 @@ with open("screenshot.png", "wb") as f:



+
+
+
+
+