Spaces:

MatteoScript
/

ImperiumAI

Sleeping

App Files Files Community

pragneshbarik commited on Oct 26, 2023

Commit

e506679

1 Parent(s): cebfd3c

added support zephyr 7b

Browse files

Files changed (3) hide show

__pycache__/mistral7b.cpython-310.pyc +0 -0
app.py +18 -9
mistral7b.py +6 -6

__pycache__/mistral7b.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/mistral7b.cpython-310.pyc and b/__pycache__/mistral7b.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from mistral7b import mistral
 import time
 import pandas as pd
 import pinecone
@@ -17,6 +17,11 @@ pinecone.init(
 pinecone_index = pinecone.Index('ikigai-chat')
 text_vectorizer = SentenceTransformer('all-distilroberta-v1')
 def gen_augmented_prompt(prompt, top_k) :
     query_vector = text_vectorizer.encode(prompt).tolist()
@@ -41,7 +46,7 @@ def gen_augmented_prompt(prompt, top_k) :
 data = {
     "Attribute": ["LLM", "Text Vectorizer", "Vector Database","CPU", "System RAM"],
-    "Information": ["Mistral-7B-Instruct-v0.1 (more models soon)","all-distilroberta-v1", "Hosted Pinecone" ,"2 vCPU", "16 GB"]
 }
 df = pd.DataFrame(data)
@@ -82,18 +87,22 @@ if "history" not in st.session_state:
                                  Let me know if you have any specific questions about Ikigai Labs or our products."""]]
 if "top_k" not in st.session_state:
-    st.session_state.top_k = 3
 if "repetion_penalty" not in st.session_state :
     st.session_state.repetion_penalty = 1
 if "rag_enabled" not in st.session_state :
     st.session_state.rag_enabled = True
 with st.sidebar:
     st.markdown("# Retrieval Settings")
-    st.session_state.rag_enabled = st.toggle("Activate RAG")
     st.session_state.top_k = st.slider(label="Documents to retrieve",
-              min_value=1, max_value=10, value=3, disabled=not st.session_state.rag_enabled)
     st.markdown("---")
     st.markdown("# Model Analytics")
@@ -107,8 +116,8 @@ with st.sidebar:
     st.markdown("# Model Settings")
-    selected_model = st.sidebar.radio(
-        'Select one:', ["Mistral 7B","Llama 7B" ,"GPT 3.5 Turbo", "GPT 4" ])
     st.session_state.temp = st.slider(
         label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
@@ -126,7 +135,7 @@ with st.sidebar:
 st.image("ikigai.svg")
 st.title("Ikigai Chat")
-st.caption("Maintained and developed by Pragnesh Barik.")
 with st.expander("What is Ikigai Chat ?"):
     st.info("""Ikigai Chat is a vector database powered chat agent, it works on the principle of
@@ -153,7 +162,7 @@ if prompt := st.chat_input("Chat with Ikigai Docs..."):
             prompt, links = gen_augmented_prompt(prompt=prompt, top_k=st.session_state.top_k)
     with st.spinner("Generating response...") :
-        response = mistral(prompt, st.session_state.history,
                        temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
     tock = time.time()

 import streamlit as st
+from mistral7b import chat
 import time
 import pandas as pd
 import pinecone
 pinecone_index = pinecone.Index('ikigai-chat')
 text_vectorizer = SentenceTransformer('all-distilroberta-v1')
+chat_bots = {
+    "Mistral 7B" : "mistralai/Mistral-7B-Instruct-v0.1",
+    "Zephyr 7B" : "HuggingFaceH4/zephyr-7b-alpha",
+}
 def gen_augmented_prompt(prompt, top_k) :
     query_vector = text_vectorizer.encode(prompt).tolist()
 data = {
     "Attribute": ["LLM", "Text Vectorizer", "Vector Database","CPU", "System RAM"],
+    "Information": ["Mistral-7B-Instruct-v0.1","all-distilroberta-v1", "Hosted Pinecone" ,"2 vCPU", "16 GB"]
 }
 df = pd.DataFrame(data)
                                  Let me know if you have any specific questions about Ikigai Labs or our products."""]]
 if "top_k" not in st.session_state:
+    st.session_state.top_k = 4
 if "repetion_penalty" not in st.session_state :
     st.session_state.repetion_penalty = 1
 if "rag_enabled" not in st.session_state :
     st.session_state.rag_enabled = True
+if "chat_bot" not in st.session_state :
+    st.session_state.chat_bot = "Mistral 7B"
 with st.sidebar:
     st.markdown("# Retrieval Settings")
+    st.session_state.rag_enabled = st.toggle("Activate RAG", value=True)
     st.session_state.top_k = st.slider(label="Documents to retrieve",
+              min_value=1, max_value=10, value=4, disabled=not st.session_state.rag_enabled)
     st.markdown("---")
     st.markdown("# Model Analytics")
     st.markdown("# Model Settings")
+    st.session_state.chat_bot = st.sidebar.radio(
+        'Select one:', ["Mistral 7B","Zephyr 7B"])
     st.session_state.temp = st.slider(
         label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
 st.image("ikigai.svg")
 st.title("Ikigai Chat")
+# st.caption("Maintained and developed by Pragnesh Barik.")
 with st.expander("What is Ikigai Chat ?"):
     st.info("""Ikigai Chat is a vector database powered chat agent, it works on the principle of
             prompt, links = gen_augmented_prompt(prompt=prompt, top_k=st.session_state.top_k)
     with st.spinner("Generating response...") :
+        response = chat(prompt, st.session_state.history,chat_client=chat_bots[st.session_state.chat_bot] ,
                        temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
     tock = time.time()

mistral7b.py CHANGED Viewed

@@ -4,10 +4,6 @@ from dotenv import load_dotenv
 load_dotenv()
 API_TOKEN = os.getenv('HF_TOKEN')
-client = InferenceClient(
-    "mistralai/Mistral-7B-Instruct-v0.1",
-    token=API_TOKEN
-)
 def format_prompt(message, history):
@@ -18,9 +14,13 @@ def format_prompt(message, history):
   prompt += f"[INST] {message} [/INST]"
   return prompt
-def mistral(
-    prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
 ):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2

 load_dotenv()
 API_TOKEN = os.getenv('HF_TOKEN')
 def format_prompt(message, history):
   prompt += f"[INST] {message} [/INST]"
   return prompt
+def chat(
+    prompt, history, chat_client = "mistralai/Mistral-7B-Instruct-v0.1",temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
 ):
+    client = InferenceClient(
+       chat_client,
+       token=API_TOKEN
+    )
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2