Spaces:

KeshavRa
/

ChatbotGuide

Sleeping

App Files Files Community

KeshavRa commited on Aug 19, 2024

Commit

f41898f

verified ·

1 Parent(s): 45c6f19

Update app.py

Browse files

Files changed (1) hide show

app.py +268 -248

app.py CHANGED Viewed

@@ -256,266 +256,286 @@ if selected_app == "3) Upload Datasets":
     st.markdown("Go to this [google colab link](https://colab.research.google.com/drive/1eCpk9HUoCKZb--tiNyQSHFW2ojoaA35m) to get started")
 if selected_app == "4) Create Chatbot":
-    num_domains = st.number_input("Number sentences per Q/A pair", value=2, step=1, min_value=1, max_value=3)
-    requirements = '''
-        openai
-        scipy
-        streamlit
-        chromadb
-        datasets
-    '''
-    st.write("requirements.txt")
-    st.code(requirements, language='python')
-    app = """
-        import os
-        import streamlit as st
-        from datasets import load_dataset
-        import chromadb
-        import string
-        from openai import OpenAI
-        import numpy as np
-        import pandas as pd
-        from scipy.spatial.distance import cosine
-        from typing import Dict, List
-        def merge_dataframes(dataframes):
-            # Concatenate the list of dataframes
-            combined_dataframe = pd.concat(dataframes, ignore_index=True)
-            # Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
-            combined_dataframe = combined_dataframe[['context', 'questions', 'answers']]
-            return combined_dataframe
-        def call_chatgpt(prompt: str, directions: str) -> str:
-            '''
-            Uses the OpenAI API to generate an AI response to a prompt.
-            Args:
-                prompt: A string representing the prompt to send to the OpenAI API.
-            Returns:
-                A string representing the AI's generated response.
-            '''
-            # Use the OpenAI API to generate a response based on the input prompt.
-            client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])
-            completion = client.chat.completions.create(
-            model="gpt-3.5-turbo-0125",
-            messages=[
-                {"role": "system", "content": directions},
-                {"role": "user", "content": prompt}
-            ]
-            )
-            # Extract the text from the first (and only) choice in the response output.
-            ans = completion.choices[0].message.content
-            # Return the generated AI response.
-            return ans
-        def openai_text_embedding(prompt: str) -> str:
-            return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[
-                "data"
-            ][0]["embedding"]
-        def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
-            # Compute sentence embeddings
-            embedding1 = openai_text_embedding(sentence1)  # Flatten the embedding array
-            embedding2 = openai_text_embedding(sentence2)  # Flatten the embedding array
-            # Convert to array
-            embedding1 = np.asarray(embedding1)
-            embedding2 = np.asarray(embedding2)
-            # Calculate cosine similarity between the embeddings
-            similarity_score = 1 - cosine(embedding1, embedding2)
-            return similarity_score
-        def add_dist_score_column(
-            dataframe: pd.DataFrame, sentence: str,
-        ) -> pd.DataFrame:
-            dataframe["stsopenai"] = dataframe["questions"].apply(
-                    lambda x: calculate_sts_openai_score(str(x), sentence)
-            )
-            sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
-            return sorted_dataframe.iloc[:5, :]
-        def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
-            '''
-            Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
-            Args:
-                df: A pandas DataFrame with columns named 'questions' and 'answers'.
-            Returns:
-                A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
             '''
-            # Initialize an empty list to store the dictionaries
-            result = []
-            # Loop through each row of the DataFrame
-            for index, row in df.iterrows():
-                # Create a dictionary with the current question and answer
-                qa_dict_quest = {"role": "user", "content": row["questions"]}
-                qa_dict_ans = {"role": "assistant", "content": row["answers"]}
-                # Add the dictionary to the result list
-                result.append(qa_dict_quest)
-                result.append(qa_dict_ans)
-            # Return the list of dictionaries
-            return result
-        st.sidebar.markdown(f'''This is a chatbot to help you learn more about {organization_name}''')
-        domain = st.sidebar.selectbox(f"Select a topic", {domains})
-        special_threshold = 0.3
-        n_results = 3
-        clear_button = st.sidebar.button("Clear Conversation", key="clear")
-        if clear_button:
-            st.session_state.messages = []
-            st.session_state.curr_domain = ""
-        ###
-        ###
-        ### Load the dataset from a provided source.
-        ###
-        ###
-        initial_input = f"Tell me about {organization_name}"
-        # Initialize a new client for ChromeDB.
-        client = chromadb.Client()
-        # Generate a random number between 1 billion and 10 billion.
-        random_number: int = np.random.randint(low=1e9, high=1e10)
-        # Generate a random string consisting of 10 uppercase letters and digits.
-        random_string: str = "".join(
-            np.random.choice(list(string.ascii_uppercase + string.digits), size=10)
-        )
-        # Combine the random number and random string into one identifier.
-        combined_string: str = f"{random_number}{random_string}"
-        # Create a new collection in ChromeDB with the combined string as its name.
-        collection = client.create_collection(combined_string)
-        st.title(f"{organization_name} Chatbot")
-        # Initialize chat history
-        if "messages" not in st.session_state:
-            st.session_state.messages = []
-        if "curr_domain" not in st.session_state:
-            st.session_state.curr_domain = ""
-        ###
-        ###
-        ### init_messages dict (one key per domain)
-        ###
-        ###
-        ###
-        ###
-        ### chatbot_instructions dict (one key per domain)
-        ###
-        ###
-        # Embed and store the first N supports for this demo
-        with st.spinner("Loading, please be patient with us ... 🙏"):
-            L = len(dataset["train"]["questions"])
-            collection.add(
-                ids=[str(i) for i in range(0, L)],  # IDs are just strings
-                documents=dataset["train"]["questions"],  # Enter questions here
-                metadatas=[{"type": "support"} for _ in range(0, L)],
-            )
-            if st.session_state.curr_domain != domain:
-                st.session_state.messages = []
-                init_message = init_messages[domain]
-                st.session_state.messages.append({"role": "assistant", "content": init_message})
-                st.session_state.curr_domain = domain
-        # Display chat messages from history on app rerun
-        for message in st.session_state.messages:
-            with st.chat_message(message["role"]):
-                st.markdown(message["content"])
-        # React to user input
-        if prompt := st.chat_input(f"Tell me about {organization_name"):
-            # Display user message in chat message container
-            st.chat_message("user").markdown(prompt)
-            # Add user message to chat history
-            st.session_state.messages.append({"role": "user", "content": prompt})
-            question = prompt
-            results = collection.query(query_texts=question, n_results=n_results)
-            idx = results["ids"][0]
-            idx = [int(i) for i in idx]
-            ref = pd.DataFrame(
-                {
-                    "idx": idx,
-                    "questions": [dataset["train"]["questions"][i] for i in idx],
-                    "answers": [dataset["train"]["answers"][i] for i in idx],
-                    "distances": results["distances"][0],
-                }
-            )
-            # special_threshold = st.sidebar.slider('How old are you?', 0, 0.6, 0.1) # 0.3
-            # special_threshold = 0.3
-            filtered_ref = ref[ref["distances"] < special_threshold]
-            if filtered_ref.shape[0] > 0:
-                # st.success("There are highly relevant information in our database.")
-                ref_from_db_search = filtered_ref["answers"].str.cat(sep=" ")
-                final_ref = filtered_ref
-            else:
-                # st.warning(
-                #     "The database may not have relevant information to help your question so please be aware of hallucinations."
-                # )
-                ref_from_db_search = ref["answers"].str.cat(sep=" ")
-                final_ref = ref
-            engineered_prompt = f'''
-                Based on the context: {ref_from_db_search},
-                answer the user question: {question}.
-            '''
-            directions = chatbot_instructions[domain]
-            answer = call_chatgpt(engineered_prompt, directions)
-            response = answer
-            # Display assistant response in chat message container
-            with st.chat_message("assistant"):
-                st.markdown(response)
-                with st.expander("See reference:"):
-                    st.table(final_ref)
-            # Add assistant response to chat history
-            st.session_state.messages.append({"role": "assistant", "content": response})
-    """
-    st.write("app.py")
-    st.code(app, language='python')

     st.markdown("Go to this [google colab link](https://colab.research.google.com/drive/1eCpk9HUoCKZb--tiNyQSHFW2ojoaA35m) to get started")
 if selected_app == "4) Create Chatbot":
+    if st.session_state.error != "":
+        st.error(st.session_state.error)
+    if st.session_state.success != None:
+        st.success("Success! Copy/paste the requirements.txt and app.py files into your HuggingFace Space")
+        st.write('requirements.txt')
+        st.code(st.session_state.success[0], language='python')
+        st.write('app.py')
+        st.code(st.session_state.success[1], language='python')
+        if st.button('Reset'):
+            st.session_state.clear()
+            st.rerun()
+    else:
+        organization_name = st.text_input("What is the name of your organization", "")
+        # num_domains = st.number_input("Number sentences per Q/A pair", value=2, step=1, min_value=1, max_value=3)
+        submit = st.button("Submit")
+        if submit:
+            st.session_state.submit = True
+        if st.session_state.submit:
+            requirements = '''
+                openai
+                scipy
+                streamlit
+                chromadb
+                datasets
             '''
+            app = """
+                import os
+                import streamlit as st
+                from datasets import load_dataset
+                import chromadb
+                import string
+                from openai import OpenAI
+                import numpy as np
+                import pandas as pd
+                from scipy.spatial.distance import cosine
+                from typing import Dict, List
+                def merge_dataframes(dataframes):
+                    # Concatenate the list of dataframes
+                    combined_dataframe = pd.concat(dataframes, ignore_index=True)
+                    # Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
+                    combined_dataframe = combined_dataframe[['context', 'questions', 'answers']]
+                    return combined_dataframe
+                def call_chatgpt(prompt: str, directions: str) -> str:
+                    '''
+                    Uses the OpenAI API to generate an AI response to a prompt.
+                    Args:
+                        prompt: A string representing the prompt to send to the OpenAI API.
+                    Returns:
+                        A string representing the AI's generated response.
+                    '''
+                    # Use the OpenAI API to generate a response based on the input prompt.
+                    client = OpenAI(api_key = os.environ["OPENAI_API_KEY"])
+                    completion = client.chat.completions.create(
+                    model="gpt-3.5-turbo-0125",
+                    messages=[
+                        {"role": "system", "content": directions},
+                        {"role": "user", "content": prompt}
+                    ]
+                    )
+                    # Extract the text from the first (and only) choice in the response output.
+                    ans = completion.choices[0].message.content
+                    # Return the generated AI response.
+                    return ans
+                def openai_text_embedding(prompt: str) -> str:
+                    return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[
+                        "data"
+                    ][0]["embedding"]
+                def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
+                    # Compute sentence embeddings
+                    embedding1 = openai_text_embedding(sentence1)  # Flatten the embedding array
+                    embedding2 = openai_text_embedding(sentence2)  # Flatten the embedding array
+                    # Convert to array
+                    embedding1 = np.asarray(embedding1)
+                    embedding2 = np.asarray(embedding2)
+                    # Calculate cosine similarity between the embeddings
+                    similarity_score = 1 - cosine(embedding1, embedding2)
+                    return similarity_score
+                def add_dist_score_column(
+                    dataframe: pd.DataFrame, sentence: str,
+                ) -> pd.DataFrame:
+                    dataframe["stsopenai"] = dataframe["questions"].apply(
+                            lambda x: calculate_sts_openai_score(str(x), sentence)
+                    )
+                    sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
+                    return sorted_dataframe.iloc[:5, :]
+                def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
+                    '''
+                    Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
+                    Args:
+                        df: A pandas DataFrame with columns named 'questions' and 'answers'.
+                    Returns:
+                        A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
+                    '''
+                    # Initialize an empty list to store the dictionaries
+                    result = []
+                    # Loop through each row of the DataFrame
+                    for index, row in df.iterrows():
+                        # Create a dictionary with the current question and answer
+                        qa_dict_quest = {"role": "user", "content": row["questions"]}
+                        qa_dict_ans = {"role": "assistant", "content": row["answers"]}
+                        # Add the dictionary to the result list
+                        result.append(qa_dict_quest)
+                        result.append(qa_dict_ans)
+                    # Return the list of dictionaries
+                    return result
+                st.sidebar.markdown(f'''This is a chatbot to help you learn more about {organization_name}''')
+                domain = st.sidebar.selectbox(f"Select a topic", {domains})
+                special_threshold = 0.3
+                n_results = 3
+                clear_button = st.sidebar.button("Clear Conversation", key="clear")
+                if clear_button:
+                    st.session_state.messages = []
+                    st.session_state.curr_domain = ""
+                ###
+                ###
+                ### Load the dataset from a provided source.
+                ###
+                ###
+                initial_input = f"Tell me about {organization_name}"
+                # Initialize a new client for ChromeDB.
+                client = chromadb.Client()
+                # Generate a random number between 1 billion and 10 billion.
+                random_number: int = np.random.randint(low=1e9, high=1e10)
+                # Generate a random string consisting of 10 uppercase letters and digits.
+                random_string: str = "".join(
+                    np.random.choice(list(string.ascii_uppercase + string.digits), size=10)
+                )
+                # Combine the random number and random string into one identifier.
+                combined_string: str = f"{random_number}{random_string}"
+                # Create a new collection in ChromeDB with the combined string as its name.
+                collection = client.create_collection(combined_string)
+                st.title(f"{organization_name} Chatbot")
+                # Initialize chat history
+                if "messages" not in st.session_state:
+                    st.session_state.messages = []
+                if "curr_domain" not in st.session_state:
+                    st.session_state.curr_domain = ""
+                ###
+                ###
+                ### init_messages dict (one key per domain)
+                ###
+                ###
+                ###
+                ###
+                ### chatbot_instructions dict (one key per domain)
+                ###
+                ###
+                # Embed and store the first N supports for this demo
+                with st.spinner("Loading, please be patient with us ... 🙏"):
+                    L = len(dataset["train"]["questions"])
+                    collection.add(
+                        ids=[str(i) for i in range(0, L)],  # IDs are just strings
+                        documents=dataset["train"]["questions"],  # Enter questions here
+                        metadatas=[{"type": "support"} for _ in range(0, L)],
+                    )
+                    if st.session_state.curr_domain != domain:
+                        st.session_state.messages = []
+                        init_message = init_messages[domain]
+                        st.session_state.messages.append({"role": "assistant", "content": init_message})
+                        st.session_state.curr_domain = domain
+                # Display chat messages from history on app rerun
+                for message in st.session_state.messages:
+                    with st.chat_message(message["role"]):
+                        st.markdown(message["content"])
+                # React to user input
+                if prompt := st.chat_input(f"Tell me about {organization_name"):
+                    # Display user message in chat message container
+                    st.chat_message("user").markdown(prompt)
+                    # Add user message to chat history
+                    st.session_state.messages.append({"role": "user", "content": prompt})
+                    question = prompt
+                    results = collection.query(query_texts=question, n_results=n_results)
+                    idx = results["ids"][0]
+                    idx = [int(i) for i in idx]
+                    ref = pd.DataFrame(
+                        {
+                            "idx": idx,
+                            "questions": [dataset["train"]["questions"][i] for i in idx],
+                            "answers": [dataset["train"]["answers"][i] for i in idx],
+                            "distances": results["distances"][0],
+                        }
+                    )
+                    # special_threshold = st.sidebar.slider('How old are you?', 0, 0.6, 0.1) # 0.3
+                    # special_threshold = 0.3
+                    filtered_ref = ref[ref["distances"] < special_threshold]
+                    if filtered_ref.shape[0] > 0:
+                        # st.success("There are highly relevant information in our database.")
+                        ref_from_db_search = filtered_ref["answers"].str.cat(sep=" ")
+                        final_ref = filtered_ref
+                    else:
+                        # st.warning(
+                        #     "The database may not have relevant information to help your question so please be aware of hallucinations."
+                        # )
+                        ref_from_db_search = ref["answers"].str.cat(sep=" ")
+                        final_ref = ref
+                    engineered_prompt = f'''
+                        Based on the context: {ref_from_db_search},
+                        answer the user question: {question}.
+                    '''
+                    directions = chatbot_instructions[domain]
+                    answer = call_chatgpt(engineered_prompt, directions)
+                    response = answer
+                    # Display assistant response in chat message container
+                    with st.chat_message("assistant"):
+                        st.markdown(response)
+                        with st.expander("See reference:"):
+                            st.table(final_ref)
+                    # Add assistant response to chat history
+                    st.session_state.messages.append({"role": "assistant", "content": response})
+            """
+            st.session_state.clear()
+            st.session_state.success = (requirements, app)
+            st.rerun()