Add router app
Browse files- .streamlit/config.toml +4 -0
- README.md +3 -3
- app.py +150 -0
- assets/unify_logo.png +0 -0
- assets/unify_spiral.png +0 -0
- requirements.txt +4 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#00b828"
|
4 |
+
font="serif"
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: Llmrouter
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.32.2
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
title: Llmrouter
|
3 |
+
emoji: π
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: purple
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.32.2
|
8 |
app_file: app.py
|
app.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import streamlit as st
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
from time import perf_counter
|
6 |
+
|
7 |
+
# Page configuration, header section, and images
|
8 |
+
st.set_page_config(
|
9 |
+
page_title= "Unify Router Demo",
|
10 |
+
page_icon="./assets/unify_spiral.png",
|
11 |
+
layout = "wide",
|
12 |
+
initial_sidebar_state="collapsed"
|
13 |
+
)
|
14 |
+
|
15 |
+
Header = st.columns(3)
|
16 |
+
with Header[1]:
|
17 |
+
st.image(
|
18 |
+
"./assets/unify_logo.png",
|
19 |
+
use_column_width="auto",
|
20 |
+
caption="Route your prompt to the best LLM"
|
21 |
+
)
|
22 |
+
st.write("Chat with the Unify LLM router! Send your prompt to the best LLM endpoint, optimizing for the metric of your choice. For any given model, the router searches across endpoints from different model endpoint providers to find the one endpoint that will provide the best performance for the target metric, for each prompt")
|
23 |
+
|
24 |
+
st.info(
|
25 |
+
body="This demo is only a preview of the router's functionalities. Check out our [Chat UI](https://unify.ai/router) for the full experience, including more endpoints, and extra customization!",
|
26 |
+
icon="βΉοΈ"
|
27 |
+
)
|
28 |
+
|
29 |
+
router_avatar = Image.open('./assets/unify_spiral.png')
|
30 |
+
|
31 |
+
# Parameter choices
|
32 |
+
strategies = {
|
33 |
+
'π fastest': "tks-per-sec",
|
34 |
+
'β most responsive': "ttft",
|
35 |
+
"π΅ cheapest": "input-cost",
|
36 |
+
}
|
37 |
+
models = {
|
38 |
+
'π¦ Llama2 70B Chat': "llama-2-70b-chat",
|
39 |
+
'πΈοΈ Mixtral 8x7B Instruct': "mixtral-8x7b-instruct-v0.1",
|
40 |
+
'π¨βπ» Deepseek Coder 33B Instruct': "deepseek-coder-33b-instruct",
|
41 |
+
}
|
42 |
+
|
43 |
+
# Body
|
44 |
+
Parameters_Col, Chat_Col = st.columns([1,3])
|
45 |
+
|
46 |
+
with Parameters_Col:
|
47 |
+
|
48 |
+
strategy = st.selectbox(
|
49 |
+
label = 'I want the',
|
50 |
+
options = tuple(strategies.keys()),
|
51 |
+
help="Choose the metric to optimize the routing for. \
|
52 |
+
Fastest picks the endpoint with the highest output tokens per seconds. \
|
53 |
+
Most responsive picks the endpoint with the smallest time to complete the request. \
|
54 |
+
Cheapest picks the endpoint with the lowest output tokens cost",
|
55 |
+
)
|
56 |
+
model = st.selectbox(
|
57 |
+
label = 'endpoint for',
|
58 |
+
options = tuple(models.keys()),
|
59 |
+
help="Select a model to optimize for. The same model can be offered by different model endpoint providers. The router lets you find the optimal endpoint for your chosen model, target metric, and input prompt",
|
60 |
+
)
|
61 |
+
with st.expander("Advanced Inputs"):
|
62 |
+
max_tokens = st.slider(
|
63 |
+
label = "Maximum Number Of Tokens",
|
64 |
+
min_value=10,
|
65 |
+
max_value=500,
|
66 |
+
value=100,
|
67 |
+
step=20,
|
68 |
+
help = "The maximum number of tokens that can be generated."
|
69 |
+
)
|
70 |
+
temperature = st.slider(
|
71 |
+
label = "Temperature",
|
72 |
+
min_value=0.0,
|
73 |
+
max_value=1.,
|
74 |
+
value=0.5,
|
75 |
+
step=0.5,
|
76 |
+
help = "The model's output randomness. Higher values give more random outputs."
|
77 |
+
)
|
78 |
+
|
79 |
+
with Chat_Col:
|
80 |
+
|
81 |
+
st.write("Chat with Router")
|
82 |
+
# Initializing empty chat space
|
83 |
+
if "messages" not in st.session_state:
|
84 |
+
st.session_state.messages = []
|
85 |
+
msgs = st.container(height=300)
|
86 |
+
|
87 |
+
# Writing conversation history
|
88 |
+
for msg in st.session_state.messages:
|
89 |
+
msgs.chat_message(msg["role"]).write(msg["content"])
|
90 |
+
|
91 |
+
# Preparing client
|
92 |
+
client = OpenAI(
|
93 |
+
base_url="https://api.unify.ai/v0/",
|
94 |
+
api_key=st.secrets["UNIFY_API"]
|
95 |
+
)
|
96 |
+
|
97 |
+
# Processing prompt box input
|
98 |
+
if prompt := st.chat_input("Enter your prompt.."):
|
99 |
+
|
100 |
+
# Displaying user prompt and saving in message states
|
101 |
+
msgs.chat_message("user").write(prompt)
|
102 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
103 |
+
|
104 |
+
# Sending prompt to model endpoint
|
105 |
+
start = perf_counter()
|
106 |
+
stream = client.chat.completions.create(
|
107 |
+
model="@".join([
|
108 |
+
models[model],
|
109 |
+
strategies[strategy]
|
110 |
+
]),
|
111 |
+
messages=[
|
112 |
+
{"role": m["role"], "content": m["content"]}
|
113 |
+
for m in st.session_state.messages
|
114 |
+
],
|
115 |
+
stream=True,
|
116 |
+
max_tokens=max_tokens,
|
117 |
+
temperature=temperature
|
118 |
+
)
|
119 |
+
time_to_completion = round(perf_counter() - start, 2)
|
120 |
+
# Displaying output, metrics, and saving output in message states
|
121 |
+
with msgs.chat_message("assistant", avatar=np.array(router_avatar)):
|
122 |
+
|
123 |
+
# Writing answer progressively
|
124 |
+
chunks = [chunk for chunk in stream]
|
125 |
+
st.write_stream(chunks)
|
126 |
+
|
127 |
+
# Computing metrics
|
128 |
+
last_chunk = chunks[-1]
|
129 |
+
cost = round(last_chunk.usage["cost"],6)
|
130 |
+
output_tokens = last_chunk.usage["completion_tokens"]
|
131 |
+
tokens_per_second = round(output_tokens / time_to_completion, 2)
|
132 |
+
|
133 |
+
# Displaying model, provider, and metrics
|
134 |
+
provider = " ".join(chunks[0].model.split("@")[-1].split("-")).title()
|
135 |
+
st.markdown(f"Model: **{model}**. Provider: **{provider}**")
|
136 |
+
st.markdown(
|
137 |
+
f"**{tokens_per_second}** Tokens Per Second - \
|
138 |
+
**{time_to_completion}** Seconds to complete - \
|
139 |
+
**{cost}** $"
|
140 |
+
)
|
141 |
+
|
142 |
+
# Saving output to message states
|
143 |
+
output_chunks = [chunk.choices[0].delta.content or "" for chunk in chunks]
|
144 |
+
response = "".join(output_chunks)
|
145 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
146 |
+
|
147 |
+
# Cancel / Stop button
|
148 |
+
if st.button("Clear Chat", key="clear"):
|
149 |
+
msgs.empty()
|
150 |
+
st.session_state.messages = []
|
assets/unify_logo.png
ADDED
![]() |
assets/unify_spiral.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.32.0
|
2 |
+
openai
|
3 |
+
numpy
|
4 |
+
pillow
|