Spaces:

unifyai
/

llmrouter

Runtime error

App Files Files Community

ivy-exploration commited on Mar 29, 2024

Commit

77ff92d

verified ·

1 Parent(s): 6b368a6

Formatting updates and adding statefulness

Browse files

Files changed (3) hide show

.streamlit/config.toml +0 -1
app.py +44 -39
style.css +8 -0

.streamlit/config.toml CHANGED Viewed

@@ -1,4 +1,3 @@
 [theme]
 base="light"
 primaryColor="#00b828"
-font="serif"

 [theme]
 base="light"
 primaryColor="#00b828"

app.py CHANGED Viewed

@@ -4,30 +4,25 @@ import numpy as np
 from PIL import Image
 from time import perf_counter
-# Page configuration, header section, and images
 st.set_page_config(
     page_title= "Unify Router Demo",
     page_icon="./assets/unify_spiral.png",
     layout = "wide",
     initial_sidebar_state="collapsed"
 )
-Header = st.columns(3)
-with Header[1]:
-    st.image(
-        "./assets/unify_logo.png",
-        use_column_width="auto",
-        caption="Route your prompt to the best LLM"
-    )
-st.write("Chat with the Unify LLM router! Send your prompt to the best LLM endpoint, optimizing for the metric of your choice. For any given model, the router searches across endpoints from different model endpoint providers to find the one endpoint that will provide the best performance for the target metric, for each prompt")
 st.info(
-    body="This demo is only a preview of the router's functionalities. Check out our [Chat UI](https://unify.ai/router) for the full experience, including more endpoints, and extra customization!",
     icon="ℹ️"
 )
-router_avatar = Image.open('./assets/unify_spiral.png')
 # Parameter choices
 strategies = {
     '🏃 fastest': "tks-per-sec",
@@ -36,8 +31,8 @@ strategies = {
 }
 models = {
     '🦙 Llama2 70B Chat': "llama-2-70b-chat",
-    '🕸️ Mixtral 8x7B Instruct': "mixtral-8x7b-instruct-v0.1",
-    '👨‍💻 Deepseek Coder 33B Instruct': "deepseek-coder-33b-instruct",
 }
 # Body
@@ -45,6 +40,13 @@ Parameters_Col, Chat_Col = st.columns([1,3])
 with Parameters_Col:
     strategy = st.selectbox(
         label = 'I want the',
         options = tuple(strategies.keys()),
@@ -78,47 +80,50 @@ with Parameters_Col:
 with Chat_Col:
-    st.write("Chat with Router")
-    # Initializing empty chat space
     if "messages" not in st.session_state:
         st.session_state.messages = []
-    msgs = st.container(height=300)
     # Writing conversation history
     for msg in st.session_state.messages:
-        msgs.chat_message(msg["role"]).write(msg["content"])
     # Preparing client
     client = OpenAI(
         base_url="https://api.unify.ai/v0/",
-        api_key=st.secrets["UNIFY_API"]
     )
     # Processing prompt box input
     if prompt := st.chat_input("Enter your prompt.."):
         # Displaying user prompt and saving in message states
-        msgs.chat_message("user").write(prompt)
         st.session_state.messages.append({"role": "user", "content": prompt})
-        # Sending prompt to model endpoint
-        start = perf_counter()
-        stream = client.chat.completions.create(
-            model="@".join([
-                    models[model],
-                    strategies[strategy]
-                ]),
-            messages=[
-                {"role": m["role"], "content": m["content"]}
-                for m in st.session_state.messages
-            ],
-            stream=True,
-            max_tokens=max_tokens,
-            temperature=temperature
-        )
-        time_to_completion = round(perf_counter() - start, 2)
         # Displaying output, metrics, and saving output in message states
-        with msgs.chat_message("assistant", avatar=np.array(router_avatar)):
             # Writing answer progressively
             chunks = [chunk for chunk in stream]
@@ -136,12 +141,12 @@ with Chat_Col:
             st.markdown(
                 f"**{tokens_per_second}** Tokens Per Second - \
                   **{time_to_completion}** Seconds to complete - \
-                  **{cost}** $"
                 )
         # Saving output to message states
         output_chunks = [chunk.choices[0].delta.content or "" for chunk in chunks]
-        response = "".join(output_chunks)
         st.session_state.messages.append({"role": "assistant", "content": response})
     # Cancel / Stop button

 from PIL import Image
 from time import perf_counter
+# Page Configuration
 st.set_page_config(
     page_title= "Unify Router Demo",
     page_icon="./assets/unify_spiral.png",
     layout = "wide",
     initial_sidebar_state="collapsed"
 )
+router_avatar = np.array(Image.open('./assets/unify_spiral.png'))
+# Custom font
+with open( "./style.css" ) as css:
+    st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)
+# Info message
 st.info(
+    body="This demo is only a preview of the router's functionalities. Check out our [Chat UI](https://unify.ai/) for the full experience, including more endpoints, and extra customization!",
     icon="ℹ️"
 )
 # Parameter choices
 strategies = {
     '🏃 fastest': "tks-per-sec",
 }
 models = {
     '🦙 Llama2 70B Chat': "llama-2-70b-chat",
+    '💨 Mixtral 8x7B Instruct': "mixtral-8x7b-instruct-v0.1",
+    '💎 Gemma 7B': "gemma-7b-it",
 }
 # Body
 with Parameters_Col:
+#    st.header("LLM Router")
+    st.image(
+         "./assets/unify_logo.png",
+         use_column_width="auto",
+     )
+    st.markdown("Send your prompts to the best LLM endpoint and optimize performance, all with a **single API**")
     strategy = st.selectbox(
         label = 'I want the',
         options = tuple(strategies.keys()),
 with Chat_Col:
+    # Initializing empty chat space and messages state
     if "messages" not in st.session_state:
         st.session_state.messages = []
+    msgs = st.container(height = 350)
     # Writing conversation history
     for msg in st.session_state.messages:
+        if msg["role"] == "user":
+            msgs.chat_message(msg["role"]).write(msg["content"])
+        else:
+            msgs.chat_message(msg["role"], avatar=router_avatar).write(msg["content"])
     # Preparing client
     client = OpenAI(
         base_url="https://api.unify.ai/v0/",
+        api_key=st.secrets("UNIFY_API")
     )
     # Processing prompt box input
     if prompt := st.chat_input("Enter your prompt.."):
         # Displaying user prompt and saving in message states
         st.session_state.messages.append({"role": "user", "content": prompt})
+        with msgs.chat_message("user"):
+            st.write(prompt)
         # Displaying output, metrics, and saving output in message states
+        with msgs.status("Routing your prompt..",expanded=True):
+            # Sending prompt to model endpoint
+            start = perf_counter()
+            stream = client.chat.completions.create(
+                model="@".join([
+                        models[model],
+                        strategies[strategy]
+                    ]),
+                messages=[
+                    {"role": m["role"], "content": m["content"]}
+                    for m in st.session_state.messages
+                ],
+                stream=True,
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            time_to_completion = round(perf_counter() - start, 2)
             # Writing answer progressively
             chunks = [chunk for chunk in stream]
             st.markdown(
                 f"**{tokens_per_second}** Tokens Per Second - \
                   **{time_to_completion}** Seconds to complete - \
+                  **{cost:.6f}** $"
                 )
         # Saving output to message states
         output_chunks = [chunk.choices[0].delta.content or "" for chunk in chunks]
+        response = ''.join(output_chunks)
         st.session_state.messages.append({"role": "assistant", "content": response})
     # Cancel / Stop button

style.css ADDED Viewed

	@@ -0,0 +1,8 @@

+@import url('https://fonts.googleapis.com/css2?family=Inter:[email protected]&display=swap');
+html, body, [class*="css"] {
+    font-family: 'Inter', sans-serif;
+    font-size: 18px;
+    font-weight: 500;
+    color: #091747;
+}