Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from utils import generate_text_embeddings | |
| from mistral7b import mistral | |
| import time | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if "tokens_used" not in st.session_state: | |
| st.session_state.tokens_used = 0 | |
| if "inference_tipipme" not in st.session_state: | |
| st.session_state.inference_time = [0.00] | |
| if "temp" not in st.session_state: | |
| st.session_state.temp = 0.8 | |
| if "model_settings" not in st.session_state: | |
| st.session_state.model_settings = { | |
| "temp": 0.9, | |
| "max_tokens": 512, | |
| } | |
| if "history" not in st.session_state: | |
| st.session_state.history = [] | |
| if "top_k" not in st.session_state: | |
| st.session_state.top_k = 5 | |
| with st.sidebar: | |
| st.markdown("# Model Analytics") | |
| st.write("Tokens used :", st.session_state['tokens_used']) | |
| st.write("Average Inference Time: ", round(sum( | |
| st.session_state["inference_time"]) / len(st.session_state["inference_time"]), 3)) | |
| st.write("Cost Incured :", round( | |
| 0.033 * st.session_state['tokens_used'] / 1000, 3), "INR") | |
| st.markdown("---") | |
| st.markdown("# Retrieval Settings") | |
| st.slider(label="Documents to retrieve", | |
| min_value=1, max_value=10, value=3) | |
| st.markdown("---") | |
| st.markdown("# Model Settings") | |
| selected_model = st.sidebar.radio( | |
| 'Select one:', ["Mistral 7B", "GPT 3.5 Turbo", "GPT 4", "Llama 7B"]) | |
| selected_temperature = st.slider( | |
| label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.5) | |
| st.write(" ") | |
| st.info("**2023 ©️ Pragnesh Barik**") | |
| st.image("ikigai.svg") | |
| st.title("Ikigai Chat") | |
| with st.expander("What is Ikigai Chat ?"): | |
| st.info("""Ikigai Chat is a vector database powered chat agent, it works on the principle of | |
| of Retrieval Augmented Generation (RAG), Its primary function revolves around maintaining an extensive repository of Ikigai Docs and providing users with answers that align with their queries. | |
| This approach ensures a more refined and tailored response to user inquiries.""") | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("Chat with Ikigai Docs?"): | |
| st.chat_message("user").markdown(prompt) | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| tick = time.time() | |
| response = mistral(prompt, st.session_state.history, | |
| temperature=st.session_state.model_settings["temp"], max_new_tokens=st.session_state.model_settings["max_tokens"]) | |
| tock = time.time() | |
| st.session_state.inference_time.append(tock - tick) | |
| response = response.replace("</s>", "") | |
| len_response = len(response.split()) | |
| st.session_state["tokens_used"] = len_response + \ | |
| st.session_state["tokens_used"] | |
| with st.chat_message("assistant"): | |
| st.markdown(response) | |
| st.session_state.history.append([prompt, response]) | |
| st.session_state.messages.append( | |
| {"role": "assistant", "content": response}) | |