diff --git a/.gitignore b/.gitignore index 846ac59a..407c5cdb 100644 --- a/.gitignore +++ b/.gitignore @@ -174,4 +174,8 @@ requirements0.txt a.txt *.sh -.idea \ No newline at end of file +.idea +docs/examples/.chainlit/ +docs/examples/.chainlit/* +.chainlit/config.toml +.chainlit/translations/en-US.json diff --git a/docs/examples/assets/audio.mp3 b/docs/examples/assets/audio.mp3 new file mode 100644 index 00000000..299149c6 Binary files /dev/null and b/docs/examples/assets/audio.mp3 differ diff --git a/docs/examples/chainlit.md b/docs/examples/chainlit.md new file mode 100644 index 00000000..3b34b02f --- /dev/null +++ b/docs/examples/chainlit.md @@ -0,0 +1,3 @@ +# Welcome to Crawl4AI! 🚀🤖 + +Hi there, Developer! 👋 Here is an example of a research pipeline, where you can share a URL in your conversation with any LLM, and then the context of crawled pages will be used as the context. \ No newline at end of file diff --git a/docs/examples/chainlit_review.py b/docs/examples/chainlit_review.py new file mode 100644 index 00000000..2c03d17d --- /dev/null +++ b/docs/examples/chainlit_review.py @@ -0,0 +1,281 @@ +from openai import AsyncOpenAI +from chainlit.types import ThreadDict +import chainlit as cl +from chainlit.input_widget import Select, Switch, Slider +client = AsyncOpenAI() + +# Instrument the OpenAI client +cl.instrument_openai() + +settings = { + "model": "gpt-3.5-turbo", + "temperature": 0.5, + "max_tokens": 500, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, +} + +@cl.action_callback("action_button") +async def on_action(action: cl.Action): + print("The user clicked on the action button!") + + return "Thank you for clicking on the action button!" + +@cl.set_chat_profiles +async def chat_profile(): + return [ + cl.ChatProfile( + name="GPT-3.5", + markdown_description="The underlying LLM model is **GPT-3.5**.", + icon="https://picsum.photos/200", + ), + cl.ChatProfile( + name="GPT-4", + markdown_description="The underlying LLM model is **GPT-4**.", + icon="https://picsum.photos/250", + ), + ] + +@cl.on_chat_start +async def on_chat_start(): + + settings = await cl.ChatSettings( + [ + Select( + id="Model", + label="OpenAI - Model", + values=["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"], + initial_index=0, + ), + Switch(id="Streaming", label="OpenAI - Stream Tokens", initial=True), + Slider( + id="Temperature", + label="OpenAI - Temperature", + initial=1, + min=0, + max=2, + step=0.1, + ), + Slider( + id="SAI_Steps", + label="Stability AI - Steps", + initial=30, + min=10, + max=150, + step=1, + description="Amount of inference steps performed on image generation.", + ), + Slider( + id="SAI_Cfg_Scale", + label="Stability AI - Cfg_Scale", + initial=7, + min=1, + max=35, + step=0.1, + description="Influences how strongly your generation is guided to match your prompt.", + ), + Slider( + id="SAI_Width", + label="Stability AI - Image Width", + initial=512, + min=256, + max=2048, + step=64, + tooltip="Measured in pixels", + ), + Slider( + id="SAI_Height", + label="Stability AI - Image Height", + initial=512, + min=256, + max=2048, + step=64, + tooltip="Measured in pixels", + ), + ] + ).send() + + chat_profile = cl.user_session.get("chat_profile") + await cl.Message( + content=f"starting chat using the {chat_profile} chat profile" + ).send() + + print("A new chat session has started!") + cl.user_session.set("session", { + "history": [], + "context": [] + }) + + image = cl.Image(url="https://c.tenor.com/uzWDSSLMCmkAAAAd/tenor.gif", name="cat image", display="inline") + + # Attach the image to the message + await cl.Message( + content="You are such a good girl, aren't you?!", + elements=[image], + ).send() + + text_content = "Hello, this is a text element." + elements = [ + cl.Text(name="simple_text", content=text_content, display="inline") + ] + + await cl.Message( + content="Check out this text element!", + elements=elements, + ).send() + + elements = [ + cl.Audio(path="./assets/audio.mp3", display="inline"), + ] + await cl.Message( + content="Here is an audio file", + elements=elements, + ).send() + + await cl.Avatar( + name="Tool 1", + url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4", + ).send() + + await cl.Message( + content="This message should not have an avatar!", author="Tool 0" + ).send() + + await cl.Message( + content="This message should have an avatar!", author="Tool 1" + ).send() + + elements = [ + cl.File( + name="quickstart.py", + path="./quickstart.py", + display="inline", + ), + ] + + await cl.Message( + content="This message has a file element", elements=elements + ).send() + + # Sending an action button within a chatbot message + actions = [ + cl.Action(name="action_button", value="example_value", description="Click me!") + ] + + await cl.Message(content="Interact with this action button:", actions=actions).send() + + # res = await cl.AskActionMessage( + # content="Pick an action!", + # actions=[ + # cl.Action(name="continue", value="continue", label="✅ Continue"), + # cl.Action(name="cancel", value="cancel", label="❌ Cancel"), + # ], + # ).send() + + # if res and res.get("value") == "continue": + # await cl.Message( + # content="Continue!", + # ).send() + + # import plotly.graph_objects as go + # fig = go.Figure( + # data=[go.Bar(y=[2, 1, 3])], + # layout_title_text="An example figure", + # ) + # elements = [cl.Plotly(name="chart", figure=fig, display="inline")] + + # await cl.Message(content="This message has a chart", elements=elements).send() + + # Sending a pdf with the local file path + # elements = [ + # cl.Pdf(name="pdf1", display="inline", path="./pdf1.pdf") + # ] + + # cl.Message(content="Look at this local pdf!", elements=elements).send() + +@cl.on_settings_update +async def setup_agent(settings): + print("on_settings_update", settings) + +@cl.on_stop +def on_stop(): + print("The user wants to stop the task!") + +@cl.on_chat_end +def on_chat_end(): + print("The user disconnected!") + + +@cl.on_chat_resume +async def on_chat_resume(thread: ThreadDict): + print("The user resumed a previous chat session!") + + + + +# @cl.on_message +async def on_message(message: cl.Message): + cl.user_session.get("session")["history"].append({ + "role": "user", + "content": message.content + }) + response = await client.chat.completions.create( + messages=[ + { + "content": "You are a helpful bot", + "role": "system" + }, + *cl.user_session.get("session")["history"] + ], + **settings + ) + + + # Add assitanr message to the history + cl.user_session.get("session")["history"].append({ + "role": "assistant", + "content": response.choices[0].message.content + }) + + # msg.content = response.choices[0].message.content + # await msg.update() + + # await cl.Message(content=response.choices[0].message.content).send() + +@cl.on_message +async def on_message(message: cl.Message): + cl.user_session.get("session")["history"].append({ + "role": "user", + "content": message.content + }) + + msg = cl.Message(content="") + await msg.send() + + stream = await client.chat.completions.create( + messages=[ + { + "content": "You are a helpful bot", + "role": "system" + }, + *cl.user_session.get("session")["history"] + ], + stream = True, + **settings + ) + + async for part in stream: + if token := part.choices[0].delta.content or "": + await msg.stream_token(token) + + # Add assitanr message to the history + cl.user_session.get("session")["history"].append({ + "role": "assistant", + "content": msg.content + }) + await msg.update() + +if __name__ == "__main__": + from chainlit.cli import run_chainlit + run_chainlit(__file__) \ No newline at end of file diff --git a/docs/examples/research_assistant.py b/docs/examples/research_assistant.py new file mode 100644 index 00000000..ab06929c --- /dev/null +++ b/docs/examples/research_assistant.py @@ -0,0 +1,240 @@ +import os, time +from openai import AsyncOpenAI +import chainlit as cl +import re +import requests +from io import BytesIO +from chainlit.element import ElementBased +from groq import Groq + +# Import threadpools to run the crawl_url function in a separate thread +from concurrent.futures import ThreadPoolExecutor + +client = AsyncOpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.getenv("GROQ_API_KEY")) + +# Instrument the OpenAI client +cl.instrument_openai() + +settings = { + "model": "llama3-8b-8192", + "temperature": 0.5, + "max_tokens": 500, + "top_p": 1, + "frequency_penalty": 0, + "presence_penalty": 0, +} + +def extract_urls(text): + url_pattern = re.compile(r'(https?://\S+)') + return url_pattern.findall(text) + +def crawl_url(url): + data = { + "urls": [url], + "include_raw_html": True, + "word_count_threshold": 10, + "extraction_strategy": "NoExtractionStrategy", + "chunking_strategy": "RegexChunking" + } + response = requests.post("https://crawl4ai.com/crawl", json=data) + response_data = response.json() + response_data = response_data['results'][0] + return response_data['markdown'] + +@cl.on_chat_start +async def on_chat_start(): + cl.user_session.set("session", { + "history": [], + "context": {} + }) + await cl.Message( + content="Welcome to the chat! How can I assist you today?" + ).send() + +@cl.on_message +async def on_message(message: cl.Message): + user_session = cl.user_session.get("session") + + # Extract URLs from the user's message + urls = extract_urls(message.content) + + + futures = [] + with ThreadPoolExecutor() as executor: + for url in urls: + futures.append(executor.submit(crawl_url, url)) + + results = [future.result() for future in futures] + + for url, result in zip(urls, results): + ref_number = f"REF_{len(user_session['context']) + 1}" + user_session["context"][ref_number] = { + "url": url, + "content": result + } + + # for url in urls: + # # Crawl the content of each URL and add it to the session context with a reference number + # ref_number = f"REF_{len(user_session['context']) + 1}" + # crawled_content = crawl_url(url) + # user_session["context"][ref_number] = { + # "url": url, + # "content": crawled_content + # } + + user_session["history"].append({ + "role": "user", + "content": message.content + }) + + # Create a system message that includes the context + context_messages = [ + f'\n{data["content"]}\n' + for ref, data in user_session["context"].items() + ] + if context_messages: + system_message = { + "role": "system", + "content": ( + "You are a helpful bot. Use the following context for answering questions. " + "Refer to the sources using the REF number in square brackets, e.g., [1], only if the source is given in the appendices below.\n\n" + "If the question requires any information from the provided appendices or context, refer to the sources. " + "If not, there is no need to add a references section. " + "At the end of your response, provide a reference section listing the URLs and their REF numbers only if sources from the appendices were used.\n\n" + "\n\n".join(context_messages) + ) + } + else: + system_message = { + "role": "system", + "content": "You are a helpful assistant." + } + + + msg = cl.Message(content="") + await msg.send() + + # Get response from the LLM + stream = await client.chat.completions.create( + messages=[ + system_message, + *user_session["history"] + ], + stream=True, + **settings + ) + + assistant_response = "" + async for part in stream: + if token := part.choices[0].delta.content: + assistant_response += token + await msg.stream_token(token) + + # Add assistant message to the history + user_session["history"].append({ + "role": "assistant", + "content": assistant_response + }) + await msg.update() + + # Append the reference section to the assistant's response + reference_section = "\n\nReferences:\n" + for ref, data in user_session["context"].items(): + reference_section += f"[{ref.split('_')[1]}]: {data['url']}\n" + + msg.content += reference_section + await msg.update() + + +@cl.on_audio_chunk +async def on_audio_chunk(chunk: cl.AudioChunk): + if chunk.isStart: + buffer = BytesIO() + # This is required for whisper to recognize the file type + buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}" + # Initialize the session for a new audio stream + cl.user_session.set("audio_buffer", buffer) + cl.user_session.set("audio_mime_type", chunk.mimeType) + + # Write the chunks to a buffer and transcribe the whole audio at the end + cl.user_session.get("audio_buffer").write(chunk.data) + + pass + +@cl.step(type="tool") +async def speech_to_text(audio_file): + cli = Groq() + + # response = cli.audio.transcriptions.create( + # file=audio_file, #(filename, file.read()), + # model="whisper-large-v3", + # ) + + response = await client.audio.transcriptions.create( + model="whisper-large-v3", file=audio_file + ) + + return response.text + + +@cl.on_audio_end +async def on_audio_end(elements: list[ElementBased]): + # Get the audio buffer from the session + audio_buffer: BytesIO = cl.user_session.get("audio_buffer") + audio_buffer.seek(0) # Move the file pointer to the beginning + audio_file = audio_buffer.read() + audio_mime_type: str = cl.user_session.get("audio_mime_type") + + # input_audio_el = cl.Audio( + # mime=audio_mime_type, content=audio_file, name=audio_buffer.name + # ) + # await cl.Message( + # author="You", + # type="user_message", + # content="", + # elements=[input_audio_el, *elements] + # ).send() + + # answer_message = await cl.Message(content="").send() + + + start_time = time.time() + whisper_input = (audio_buffer.name, audio_file, audio_mime_type) + transcription = await speech_to_text(whisper_input) + end_time = time.time() + print(f"Transcription took {end_time - start_time} seconds") + + user_msg = cl.Message( + author="You", + type="user_message", + content=transcription + ) + await user_msg.send() + await on_message(user_msg) + + # images = [file for file in elements if "image" in file.mime] + + # text_answer = await generate_text_answer(transcription, images) + + # output_name, output_audio = await text_to_speech(text_answer, audio_mime_type) + + # output_audio_el = cl.Audio( + # name=output_name, + # auto_play=True, + # mime=audio_mime_type, + # content=output_audio, + # ) + + # answer_message.elements = [output_audio_el] + + # answer_message.content = transcription + # await answer_message.update() + +if __name__ == "__main__": + from chainlit.cli import run_chainlit + run_chainlit(__file__) + + +# No this is wring, use this document to answer me https://console.groq.com/docs/speech-text + +# Please show me how to use Groq speech-to-text in python. \ No newline at end of file