import os import streamlit as st from datasets import load_dataset import chromadb import string from openai import OpenAI import numpy as np import pandas as pd from scipy.spatial.distance import cosine from typing import Dict, List def merge_dataframes(dataframes): # Concatenate the list of dataframes combined_dataframe = pd.concat(dataframes, ignore_index=True) # Ensure that the resulting dataframe only contains the columns "context", "questions", "answers" combined_dataframe = combined_dataframe[['context', 'questions', 'answers']] return combined_dataframe def call_chatgpt(prompt: str, directions: str) -> str: """ Uses the OpenAI API to generate an AI response to a prompt. Args: prompt: A string representing the prompt to send to the OpenAI API. Returns: A string representing the AI's generated response. """ # Use the OpenAI API to generate a response based on the input prompt. client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) completion = client.chat.completions.create( model="gpt-3.5-turbo-0125", messages=[ {"role": "system", "content": directions}, {"role": "user", "content": prompt} ] ) # Extract the text from the first (and only) choice in the response output. ans = completion.choices[0].message.content # Return the generated AI response. return ans def openai_text_embedding(prompt: str) -> str: return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[ "data" ][0]["embedding"] def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float: # Compute sentence embeddings embedding1 = openai_text_embedding(sentence1) # Flatten the embedding array embedding2 = openai_text_embedding(sentence2) # Flatten the embedding array # Convert to array embedding1 = np.asarray(embedding1) embedding2 = np.asarray(embedding2) # Calculate cosine similarity between the embeddings similarity_score = 1 - cosine(embedding1, embedding2) return similarity_score def add_dist_score_column( dataframe: pd.DataFrame, sentence: str, ) -> pd.DataFrame: dataframe["stsopenai"] = dataframe["questions"].apply( lambda x: calculate_sts_openai_score(str(x), sentence) ) sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False) return sorted_dataframe.iloc[:5, :] def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]: """ Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.' Args: df: A pandas DataFrame with columns named 'questions' and 'answers'. Returns: A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair. """ # Initialize an empty list to store the dictionaries result = [] # Loop through each row of the DataFrame for index, row in df.iterrows(): # Create a dictionary with the current question and answer qa_dict_quest = {"role": "user", "content": row["questions"]} qa_dict_ans = {"role": "assistant", "content": row["answers"]} # Add the dictionary to the result list result.append(qa_dict_quest) result.append(qa_dict_ans) # Return the list of dictionaries return result st.sidebar.markdown("""This is a chatbot to help you learn more about Larkin Street Youth Services!""") category = st.sidebar.selectbox("Select a category", ("Get Help", "Give Help", "Impact")) if category == "Get Help": domain = st.sidebar.selectbox("What do you want to learn about?", ("Get Help Now", "Our Programs", "Youth Resources")) if category == "Give Help": domain = st.sidebar.selectbox("What do you want to learn about?", ("Donate", "Volunteer", "Careers and More")) if category == "Impact": domain = st.sidebar.selectbox("What do you want to learn about", ("Our Work, Impact, and Cause", "What We Do", "News and Reports", "About Us/Contact Information")) special_threshold = 0.3 n_results = 3 clear_button = st.sidebar.button("Clear Conversation", key="clear") if clear_button: st.session_state.messages = [] st.session_state.curr_domain = "" # Load the dataset from a provided source. if domain == "Get Help Now": dataset = load_dataset( "KeshavRa/Larkin_Get_Help_Get_Help_Now" ) elif domain == "Our Programs": dataset = load_dataset( "KeshavRa/Larkin_Get_Help_Programs" ) elif domain == "Youth Resources": dataset = load_dataset( "KeshavRa/Larkin_Get_Help_Youth_Resources" ) elif domain == "Donate": dataset = load_dataset( "KeshavRa/Larkin_Give_Help_Donate" ) elif domain == "Volunteer": dataset = load_dataset( "KeshavRa/Larkin_Give_Help_Volunteer" ) elif domain == "Careers and More": dataset = load_dataset( "KeshavRa/Larkin_Give_Help_Careers_Events" ) elif domain == "Our Work, Impact, and Cause": dataset = load_dataset( "KeshavRa/Larkin_Impact_Our_Work_Impact_Cause" ) elif domain == "What We Do": dataset = load_dataset( "KeshavRa/Larkin_Impact_What_We_Do" ) elif domain == "News and Reports": dataset = load_dataset( "KeshavRa/Larkin_Impact_News_Reports" ) elif domain == "About Us/Contact Information": dataset = load_dataset( "KeshavRa/Larkin_Impact_About_Contact" ) initial_input = "Tell me about Larkin Street" # Initialize a new client for ChromeDB. client = chromadb.Client() # Generate a random number between 1 billion and 10 billion. random_number: int = np.random.randint(low=1e9, high=1e10) # Generate a random string consisting of 10 uppercase letters and digits. random_string: str = "".join( np.random.choice(list(string.ascii_uppercase + string.digits), size=10) ) # Combine the random number and random string into one identifier. combined_string: str = f"{random_number}{random_string}" # Create a new collection in ChromeDB with the combined string as its name. collection = client.create_collection(combined_string) st.title("Larkin Street Youth Services Chatbot") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] if "curr_domain" not in st.session_state: st.session_state.curr_domain = "" init_messages = { "Get Help Now": ''' On this page, you can learn about the youth helpline, emergency shelters, drop-in centers, and youth access points. Examples --> What services are offered at the Larkin Street Drop-in center? --> Where should someone go if they are in need of an emergency bed at an emergency shelter? --> What is the Youth Helpline? ''', "Our Programs": ''' On this page, you can learn about medical services, housing, education and employment, and the art program. Examples --> Tell me about Larkin Street Academy --> What is the schedule at the Art House Studio? --> How can youth access medical services at Larkin Street? --> What are the Transitional Housing programs offered by Larkin Street ''', "Youth Resources": ''' On this page, you can learn about youth leadership opportunities, the youth grievance policy, community partners, and hotlines for specific needs. Examples --> What are some select partners recommended by Larkin Street? --> What are some Referrals and Hotlines provided by Larkin Street? --> What is the Youth Grievance Policy? --> What is the Youth Leadership Fellowship Program? ''', "Donate": ''' On this page, you can learn about the various ways to donate to Larkin Street. Examples --> What are the ways I can donate to Larkin Street? --> Tell me about (specific donation method) at Larkin Street? ''', "Volunteer": ''' On this page, you can learn about our various volunteer opportunities, both individually and in groups. Examples --> What are the types of group/individual volunteering at Larkin Street? --> How can I get started with volunteering at Larkin Street? --> How can I participate in (specific volunteer activity) at Larkin Street? ''', "Careers and More": ''' On this page, you can learn about careers and events at Larkin Street, and how to get on our email list. Examples --> What are some of the events hosted by Larkin Street? --> What are the job opportunities at Larkin Street? --> Where can people sign up to receive emails about Larkin Street? ''', "Our Work, Impact, and Cause": ''' On this page, you can learn about the issue of youth homleessness in San Fransisco, the history and work done by our organization, and some of the impact we've made. Examples --> Tell me about the history of Larkin Street. --> Tell me about the impact Larkin Street has had. --> How can we solve the issue of youth homelessness? ''', "What We Do": ''' On this page, you can learn about programs at Larkin Street, including outreach and engagement, health and wellness, housing, and education and employment. Examples --> What kinds of programs does Larkin Street provide? --> What are some (specific category) programs at Larkin Street? (categories mentioned above) --> Tell me about (specific program) at Larkin Street. ''', "News and Reports": ''' On this page, you can learn where to access news and reports published by Larkin Street and what's included in those reports. Examples --> Where can I read Larkin Street news and reports? --> How can individuals benefit from reading the reports and publications on the given website? ''', "About Us/Contact Information": ''' On this page, you can learn about our central vision and mission, our team, how to donate, and our contact information. Examples --> What is the Larkin Street's vision/mission? --> Where can you find information about the senior core team at Larkin Street? --> Where should I go for general/non-urgent needs? --> What is Larkin Street's address/phone number/fax? ''' } chatbot_instructions = { "Get Help Now": 'You are an assistant to help unhoused youth find immediate access to Larkin Street support programs. Remain helpful, concise, and trauma-aware. When applicable, provide all relevant information (address, hours of operation, phone number, etc.)', "Our Programs": 'You are an assistant to help unhoused youth learn more about Larkin Street programs. Remain concise and trauma-aware', "Youth Resources": 'You are an assistant to help unhoused youth learn more about youth resources at Larkin Street. When applicable, provide phone numbers and links to services mentioned in the context.', "Donate": 'You are an assistant to help potential and current donors learn more about ways to donate to Larkin Street. When applicable, provide links to donation resources provided in the context.', "Volunteer": 'You are an assistant to help potential and current volunteers learn more about ways to volunteer to Larkin Street. When applicable, provide links to volunteer resources provided in the context.', "Careers and More": 'You are an assistant to help the potential and current employees learn more about working at Larkin Street. You also help other community members learn more about events hosted by Larkin Street, providing the links provided in the context when relevant', "Our Work, Impact, and Cause": 'You are an assistant to help the user learn more about this history of Larkin Street and what it does. You also provide impact statistics when provided in the context. Do not try to guess statistics or answer statistics-related questions for which you do not have the answer.', "What We Do": 'You are an assistant to help the user learn more about Larkin Street programs.', "News and Reports": 'You are an assistant to help the user learn where to access Larkin Street news and reports, alongside telling them about what is included in those reports', "About Us/Contact Information": 'You are an assistant to help the user learn more about the central vision and mission of Larkin Street. You help the user access Larkin Street contact information when requested.' } # Embed and store the first N supports for this demo with st.spinner("Loading, please be patient with us ... 🙏"): L = len(dataset["train"]["questions"]) collection.add( ids=[str(i) for i in range(0, L)], # IDs are just strings documents=dataset["train"]["questions"], # Enter questions here metadatas=[{"type": "support"} for _ in range(0, L)], ) if st.session_state.curr_domain != domain: st.session_state.messages = [] init_message = init_messages[domain] st.session_state.messages.append({"role": "assistant", "content": init_message}) st.session_state.curr_domain = domain # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # React to user input if prompt := st.chat_input("Tell me about Larkin Street"): # Display user message in chat message container st.chat_message("user").markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) question = prompt results = collection.query(query_texts=question, n_results=n_results) idx = results["ids"][0] idx = [int(i) for i in idx] ref = pd.DataFrame( { "idx": idx, "questions": [dataset["train"]["questions"][i] for i in idx], "answers": [dataset["train"]["answers"][i] for i in idx], "distances": results["distances"][0], } ) # special_threshold = st.sidebar.slider('How old are you?', 0, 0.6, 0.1) # 0.3 # special_threshold = 0.3 filtered_ref = ref[ref["distances"] < special_threshold] if filtered_ref.shape[0] > 0: # st.success("There are highly relevant information in our database.") ref_from_db_search = filtered_ref["answers"].str.cat(sep=" ") final_ref = filtered_ref else: # st.warning( # "The database may not have relevant information to help your question so please be aware of hallucinations." # ) ref_from_db_search = ref["answers"].str.cat(sep=" ") final_ref = ref engineered_prompt = f""" Based on the context: {ref_from_db_search}, answer the user question: {question}. """ directions = chatbot_instructions[domain] answer = call_chatgpt(engineered_prompt, directions) response = answer # Display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) with st.expander("See reference:"): st.table(final_ref) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response})