Add research assistant example using Chainlit
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -174,4 +174,8 @@ requirements0.txt
|
|||||||
a.txt
|
a.txt
|
||||||
|
|
||||||
*.sh
|
*.sh
|
||||||
.idea
|
.idea
|
||||||
|
docs/examples/.chainlit/
|
||||||
|
docs/examples/.chainlit/*
|
||||||
|
.chainlit/config.toml
|
||||||
|
.chainlit/translations/en-US.json
|
||||||
|
|||||||
BIN
docs/examples/assets/audio.mp3
Normal file
BIN
docs/examples/assets/audio.mp3
Normal file
Binary file not shown.
3
docs/examples/chainlit.md
Normal file
3
docs/examples/chainlit.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Welcome to Crawl4AI! 🚀🤖
|
||||||
|
|
||||||
|
Hi there, Developer! 👋 Here is an example of a research pipeline, where you can share a URL in your conversation with any LLM, and then the context of crawled pages will be used as the context.
|
||||||
281
docs/examples/chainlit_review.py
Normal file
281
docs/examples/chainlit_review.py
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
from openai import AsyncOpenAI
|
||||||
|
from chainlit.types import ThreadDict
|
||||||
|
import chainlit as cl
|
||||||
|
from chainlit.input_widget import Select, Switch, Slider
|
||||||
|
client = AsyncOpenAI()
|
||||||
|
|
||||||
|
# Instrument the OpenAI client
|
||||||
|
cl.instrument_openai()
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"temperature": 0.5,
|
||||||
|
"max_tokens": 500,
|
||||||
|
"top_p": 1,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@cl.action_callback("action_button")
|
||||||
|
async def on_action(action: cl.Action):
|
||||||
|
print("The user clicked on the action button!")
|
||||||
|
|
||||||
|
return "Thank you for clicking on the action button!"
|
||||||
|
|
||||||
|
@cl.set_chat_profiles
|
||||||
|
async def chat_profile():
|
||||||
|
return [
|
||||||
|
cl.ChatProfile(
|
||||||
|
name="GPT-3.5",
|
||||||
|
markdown_description="The underlying LLM model is **GPT-3.5**.",
|
||||||
|
icon="https://picsum.photos/200",
|
||||||
|
),
|
||||||
|
cl.ChatProfile(
|
||||||
|
name="GPT-4",
|
||||||
|
markdown_description="The underlying LLM model is **GPT-4**.",
|
||||||
|
icon="https://picsum.photos/250",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@cl.on_chat_start
|
||||||
|
async def on_chat_start():
|
||||||
|
|
||||||
|
settings = await cl.ChatSettings(
|
||||||
|
[
|
||||||
|
Select(
|
||||||
|
id="Model",
|
||||||
|
label="OpenAI - Model",
|
||||||
|
values=["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"],
|
||||||
|
initial_index=0,
|
||||||
|
),
|
||||||
|
Switch(id="Streaming", label="OpenAI - Stream Tokens", initial=True),
|
||||||
|
Slider(
|
||||||
|
id="Temperature",
|
||||||
|
label="OpenAI - Temperature",
|
||||||
|
initial=1,
|
||||||
|
min=0,
|
||||||
|
max=2,
|
||||||
|
step=0.1,
|
||||||
|
),
|
||||||
|
Slider(
|
||||||
|
id="SAI_Steps",
|
||||||
|
label="Stability AI - Steps",
|
||||||
|
initial=30,
|
||||||
|
min=10,
|
||||||
|
max=150,
|
||||||
|
step=1,
|
||||||
|
description="Amount of inference steps performed on image generation.",
|
||||||
|
),
|
||||||
|
Slider(
|
||||||
|
id="SAI_Cfg_Scale",
|
||||||
|
label="Stability AI - Cfg_Scale",
|
||||||
|
initial=7,
|
||||||
|
min=1,
|
||||||
|
max=35,
|
||||||
|
step=0.1,
|
||||||
|
description="Influences how strongly your generation is guided to match your prompt.",
|
||||||
|
),
|
||||||
|
Slider(
|
||||||
|
id="SAI_Width",
|
||||||
|
label="Stability AI - Image Width",
|
||||||
|
initial=512,
|
||||||
|
min=256,
|
||||||
|
max=2048,
|
||||||
|
step=64,
|
||||||
|
tooltip="Measured in pixels",
|
||||||
|
),
|
||||||
|
Slider(
|
||||||
|
id="SAI_Height",
|
||||||
|
label="Stability AI - Image Height",
|
||||||
|
initial=512,
|
||||||
|
min=256,
|
||||||
|
max=2048,
|
||||||
|
step=64,
|
||||||
|
tooltip="Measured in pixels",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
).send()
|
||||||
|
|
||||||
|
chat_profile = cl.user_session.get("chat_profile")
|
||||||
|
await cl.Message(
|
||||||
|
content=f"starting chat using the {chat_profile} chat profile"
|
||||||
|
).send()
|
||||||
|
|
||||||
|
print("A new chat session has started!")
|
||||||
|
cl.user_session.set("session", {
|
||||||
|
"history": [],
|
||||||
|
"context": []
|
||||||
|
})
|
||||||
|
|
||||||
|
image = cl.Image(url="https://c.tenor.com/uzWDSSLMCmkAAAAd/tenor.gif", name="cat image", display="inline")
|
||||||
|
|
||||||
|
# Attach the image to the message
|
||||||
|
await cl.Message(
|
||||||
|
content="You are such a good girl, aren't you?!",
|
||||||
|
elements=[image],
|
||||||
|
).send()
|
||||||
|
|
||||||
|
text_content = "Hello, this is a text element."
|
||||||
|
elements = [
|
||||||
|
cl.Text(name="simple_text", content=text_content, display="inline")
|
||||||
|
]
|
||||||
|
|
||||||
|
await cl.Message(
|
||||||
|
content="Check out this text element!",
|
||||||
|
elements=elements,
|
||||||
|
).send()
|
||||||
|
|
||||||
|
elements = [
|
||||||
|
cl.Audio(path="./assets/audio.mp3", display="inline"),
|
||||||
|
]
|
||||||
|
await cl.Message(
|
||||||
|
content="Here is an audio file",
|
||||||
|
elements=elements,
|
||||||
|
).send()
|
||||||
|
|
||||||
|
await cl.Avatar(
|
||||||
|
name="Tool 1",
|
||||||
|
url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4",
|
||||||
|
).send()
|
||||||
|
|
||||||
|
await cl.Message(
|
||||||
|
content="This message should not have an avatar!", author="Tool 0"
|
||||||
|
).send()
|
||||||
|
|
||||||
|
await cl.Message(
|
||||||
|
content="This message should have an avatar!", author="Tool 1"
|
||||||
|
).send()
|
||||||
|
|
||||||
|
elements = [
|
||||||
|
cl.File(
|
||||||
|
name="quickstart.py",
|
||||||
|
path="./quickstart.py",
|
||||||
|
display="inline",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
await cl.Message(
|
||||||
|
content="This message has a file element", elements=elements
|
||||||
|
).send()
|
||||||
|
|
||||||
|
# Sending an action button within a chatbot message
|
||||||
|
actions = [
|
||||||
|
cl.Action(name="action_button", value="example_value", description="Click me!")
|
||||||
|
]
|
||||||
|
|
||||||
|
await cl.Message(content="Interact with this action button:", actions=actions).send()
|
||||||
|
|
||||||
|
# res = await cl.AskActionMessage(
|
||||||
|
# content="Pick an action!",
|
||||||
|
# actions=[
|
||||||
|
# cl.Action(name="continue", value="continue", label="✅ Continue"),
|
||||||
|
# cl.Action(name="cancel", value="cancel", label="❌ Cancel"),
|
||||||
|
# ],
|
||||||
|
# ).send()
|
||||||
|
|
||||||
|
# if res and res.get("value") == "continue":
|
||||||
|
# await cl.Message(
|
||||||
|
# content="Continue!",
|
||||||
|
# ).send()
|
||||||
|
|
||||||
|
# import plotly.graph_objects as go
|
||||||
|
# fig = go.Figure(
|
||||||
|
# data=[go.Bar(y=[2, 1, 3])],
|
||||||
|
# layout_title_text="An example figure",
|
||||||
|
# )
|
||||||
|
# elements = [cl.Plotly(name="chart", figure=fig, display="inline")]
|
||||||
|
|
||||||
|
# await cl.Message(content="This message has a chart", elements=elements).send()
|
||||||
|
|
||||||
|
# Sending a pdf with the local file path
|
||||||
|
# elements = [
|
||||||
|
# cl.Pdf(name="pdf1", display="inline", path="./pdf1.pdf")
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# cl.Message(content="Look at this local pdf!", elements=elements).send()
|
||||||
|
|
||||||
|
@cl.on_settings_update
|
||||||
|
async def setup_agent(settings):
|
||||||
|
print("on_settings_update", settings)
|
||||||
|
|
||||||
|
@cl.on_stop
|
||||||
|
def on_stop():
|
||||||
|
print("The user wants to stop the task!")
|
||||||
|
|
||||||
|
@cl.on_chat_end
|
||||||
|
def on_chat_end():
|
||||||
|
print("The user disconnected!")
|
||||||
|
|
||||||
|
|
||||||
|
@cl.on_chat_resume
|
||||||
|
async def on_chat_resume(thread: ThreadDict):
|
||||||
|
print("The user resumed a previous chat session!")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# @cl.on_message
|
||||||
|
async def on_message(message: cl.Message):
|
||||||
|
cl.user_session.get("session")["history"].append({
|
||||||
|
"role": "user",
|
||||||
|
"content": message.content
|
||||||
|
})
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"content": "You are a helpful bot",
|
||||||
|
"role": "system"
|
||||||
|
},
|
||||||
|
*cl.user_session.get("session")["history"]
|
||||||
|
],
|
||||||
|
**settings
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Add assitanr message to the history
|
||||||
|
cl.user_session.get("session")["history"].append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": response.choices[0].message.content
|
||||||
|
})
|
||||||
|
|
||||||
|
# msg.content = response.choices[0].message.content
|
||||||
|
# await msg.update()
|
||||||
|
|
||||||
|
# await cl.Message(content=response.choices[0].message.content).send()
|
||||||
|
|
||||||
|
@cl.on_message
|
||||||
|
async def on_message(message: cl.Message):
|
||||||
|
cl.user_session.get("session")["history"].append({
|
||||||
|
"role": "user",
|
||||||
|
"content": message.content
|
||||||
|
})
|
||||||
|
|
||||||
|
msg = cl.Message(content="")
|
||||||
|
await msg.send()
|
||||||
|
|
||||||
|
stream = await client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"content": "You are a helpful bot",
|
||||||
|
"role": "system"
|
||||||
|
},
|
||||||
|
*cl.user_session.get("session")["history"]
|
||||||
|
],
|
||||||
|
stream = True,
|
||||||
|
**settings
|
||||||
|
)
|
||||||
|
|
||||||
|
async for part in stream:
|
||||||
|
if token := part.choices[0].delta.content or "":
|
||||||
|
await msg.stream_token(token)
|
||||||
|
|
||||||
|
# Add assitanr message to the history
|
||||||
|
cl.user_session.get("session")["history"].append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": msg.content
|
||||||
|
})
|
||||||
|
await msg.update()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from chainlit.cli import run_chainlit
|
||||||
|
run_chainlit(__file__)
|
||||||
240
docs/examples/research_assistant.py
Normal file
240
docs/examples/research_assistant.py
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
import os, time
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
import chainlit as cl
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from io import BytesIO
|
||||||
|
from chainlit.element import ElementBased
|
||||||
|
from groq import Groq
|
||||||
|
|
||||||
|
# Import threadpools to run the crawl_url function in a separate thread
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
client = AsyncOpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.getenv("GROQ_API_KEY"))
|
||||||
|
|
||||||
|
# Instrument the OpenAI client
|
||||||
|
cl.instrument_openai()
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
"model": "llama3-8b-8192",
|
||||||
|
"temperature": 0.5,
|
||||||
|
"max_tokens": 500,
|
||||||
|
"top_p": 1,
|
||||||
|
"frequency_penalty": 0,
|
||||||
|
"presence_penalty": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
def extract_urls(text):
|
||||||
|
url_pattern = re.compile(r'(https?://\S+)')
|
||||||
|
return url_pattern.findall(text)
|
||||||
|
|
||||||
|
def crawl_url(url):
|
||||||
|
data = {
|
||||||
|
"urls": [url],
|
||||||
|
"include_raw_html": True,
|
||||||
|
"word_count_threshold": 10,
|
||||||
|
"extraction_strategy": "NoExtractionStrategy",
|
||||||
|
"chunking_strategy": "RegexChunking"
|
||||||
|
}
|
||||||
|
response = requests.post("https://crawl4ai.com/crawl", json=data)
|
||||||
|
response_data = response.json()
|
||||||
|
response_data = response_data['results'][0]
|
||||||
|
return response_data['markdown']
|
||||||
|
|
||||||
|
@cl.on_chat_start
|
||||||
|
async def on_chat_start():
|
||||||
|
cl.user_session.set("session", {
|
||||||
|
"history": [],
|
||||||
|
"context": {}
|
||||||
|
})
|
||||||
|
await cl.Message(
|
||||||
|
content="Welcome to the chat! How can I assist you today?"
|
||||||
|
).send()
|
||||||
|
|
||||||
|
@cl.on_message
|
||||||
|
async def on_message(message: cl.Message):
|
||||||
|
user_session = cl.user_session.get("session")
|
||||||
|
|
||||||
|
# Extract URLs from the user's message
|
||||||
|
urls = extract_urls(message.content)
|
||||||
|
|
||||||
|
|
||||||
|
futures = []
|
||||||
|
with ThreadPoolExecutor() as executor:
|
||||||
|
for url in urls:
|
||||||
|
futures.append(executor.submit(crawl_url, url))
|
||||||
|
|
||||||
|
results = [future.result() for future in futures]
|
||||||
|
|
||||||
|
for url, result in zip(urls, results):
|
||||||
|
ref_number = f"REF_{len(user_session['context']) + 1}"
|
||||||
|
user_session["context"][ref_number] = {
|
||||||
|
"url": url,
|
||||||
|
"content": result
|
||||||
|
}
|
||||||
|
|
||||||
|
# for url in urls:
|
||||||
|
# # Crawl the content of each URL and add it to the session context with a reference number
|
||||||
|
# ref_number = f"REF_{len(user_session['context']) + 1}"
|
||||||
|
# crawled_content = crawl_url(url)
|
||||||
|
# user_session["context"][ref_number] = {
|
||||||
|
# "url": url,
|
||||||
|
# "content": crawled_content
|
||||||
|
# }
|
||||||
|
|
||||||
|
user_session["history"].append({
|
||||||
|
"role": "user",
|
||||||
|
"content": message.content
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a system message that includes the context
|
||||||
|
context_messages = [
|
||||||
|
f'<appendix ref="{ref}">\n{data["content"]}\n</appendix>'
|
||||||
|
for ref, data in user_session["context"].items()
|
||||||
|
]
|
||||||
|
if context_messages:
|
||||||
|
system_message = {
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"You are a helpful bot. Use the following context for answering questions. "
|
||||||
|
"Refer to the sources using the REF number in square brackets, e.g., [1], only if the source is given in the appendices below.\n\n"
|
||||||
|
"If the question requires any information from the provided appendices or context, refer to the sources. "
|
||||||
|
"If not, there is no need to add a references section. "
|
||||||
|
"At the end of your response, provide a reference section listing the URLs and their REF numbers only if sources from the appendices were used.\n\n"
|
||||||
|
"\n\n".join(context_messages)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
system_message = {
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant."
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
msg = cl.Message(content="")
|
||||||
|
await msg.send()
|
||||||
|
|
||||||
|
# Get response from the LLM
|
||||||
|
stream = await client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
system_message,
|
||||||
|
*user_session["history"]
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
|
**settings
|
||||||
|
)
|
||||||
|
|
||||||
|
assistant_response = ""
|
||||||
|
async for part in stream:
|
||||||
|
if token := part.choices[0].delta.content:
|
||||||
|
assistant_response += token
|
||||||
|
await msg.stream_token(token)
|
||||||
|
|
||||||
|
# Add assistant message to the history
|
||||||
|
user_session["history"].append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": assistant_response
|
||||||
|
})
|
||||||
|
await msg.update()
|
||||||
|
|
||||||
|
# Append the reference section to the assistant's response
|
||||||
|
reference_section = "\n\nReferences:\n"
|
||||||
|
for ref, data in user_session["context"].items():
|
||||||
|
reference_section += f"[{ref.split('_')[1]}]: {data['url']}\n"
|
||||||
|
|
||||||
|
msg.content += reference_section
|
||||||
|
await msg.update()
|
||||||
|
|
||||||
|
|
||||||
|
@cl.on_audio_chunk
|
||||||
|
async def on_audio_chunk(chunk: cl.AudioChunk):
|
||||||
|
if chunk.isStart:
|
||||||
|
buffer = BytesIO()
|
||||||
|
# This is required for whisper to recognize the file type
|
||||||
|
buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}"
|
||||||
|
# Initialize the session for a new audio stream
|
||||||
|
cl.user_session.set("audio_buffer", buffer)
|
||||||
|
cl.user_session.set("audio_mime_type", chunk.mimeType)
|
||||||
|
|
||||||
|
# Write the chunks to a buffer and transcribe the whole audio at the end
|
||||||
|
cl.user_session.get("audio_buffer").write(chunk.data)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
@cl.step(type="tool")
|
||||||
|
async def speech_to_text(audio_file):
|
||||||
|
cli = Groq()
|
||||||
|
|
||||||
|
# response = cli.audio.transcriptions.create(
|
||||||
|
# file=audio_file, #(filename, file.read()),
|
||||||
|
# model="whisper-large-v3",
|
||||||
|
# )
|
||||||
|
|
||||||
|
response = await client.audio.transcriptions.create(
|
||||||
|
model="whisper-large-v3", file=audio_file
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
@cl.on_audio_end
|
||||||
|
async def on_audio_end(elements: list[ElementBased]):
|
||||||
|
# Get the audio buffer from the session
|
||||||
|
audio_buffer: BytesIO = cl.user_session.get("audio_buffer")
|
||||||
|
audio_buffer.seek(0) # Move the file pointer to the beginning
|
||||||
|
audio_file = audio_buffer.read()
|
||||||
|
audio_mime_type: str = cl.user_session.get("audio_mime_type")
|
||||||
|
|
||||||
|
# input_audio_el = cl.Audio(
|
||||||
|
# mime=audio_mime_type, content=audio_file, name=audio_buffer.name
|
||||||
|
# )
|
||||||
|
# await cl.Message(
|
||||||
|
# author="You",
|
||||||
|
# type="user_message",
|
||||||
|
# content="",
|
||||||
|
# elements=[input_audio_el, *elements]
|
||||||
|
# ).send()
|
||||||
|
|
||||||
|
# answer_message = await cl.Message(content="").send()
|
||||||
|
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
whisper_input = (audio_buffer.name, audio_file, audio_mime_type)
|
||||||
|
transcription = await speech_to_text(whisper_input)
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"Transcription took {end_time - start_time} seconds")
|
||||||
|
|
||||||
|
user_msg = cl.Message(
|
||||||
|
author="You",
|
||||||
|
type="user_message",
|
||||||
|
content=transcription
|
||||||
|
)
|
||||||
|
await user_msg.send()
|
||||||
|
await on_message(user_msg)
|
||||||
|
|
||||||
|
# images = [file for file in elements if "image" in file.mime]
|
||||||
|
|
||||||
|
# text_answer = await generate_text_answer(transcription, images)
|
||||||
|
|
||||||
|
# output_name, output_audio = await text_to_speech(text_answer, audio_mime_type)
|
||||||
|
|
||||||
|
# output_audio_el = cl.Audio(
|
||||||
|
# name=output_name,
|
||||||
|
# auto_play=True,
|
||||||
|
# mime=audio_mime_type,
|
||||||
|
# content=output_audio,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# answer_message.elements = [output_audio_el]
|
||||||
|
|
||||||
|
# answer_message.content = transcription
|
||||||
|
# await answer_message.update()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from chainlit.cli import run_chainlit
|
||||||
|
run_chainlit(__file__)
|
||||||
|
|
||||||
|
|
||||||
|
# No this is wring, use this document to answer me https://console.groq.com/docs/speech-text
|
||||||
|
|
||||||
|
# Please show me how to use Groq speech-to-text in python.
|
||||||
Reference in New Issue
Block a user