Formatting updates and adding statefulness
Browse files- .streamlit/config.toml +0 -1
- app.py +44 -39
- style.css +8 -0
.streamlit/config.toml
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
[theme]
|
2 |
base="light"
|
3 |
primaryColor="#00b828"
|
4 |
-
font="serif"
|
|
|
1 |
[theme]
|
2 |
base="light"
|
3 |
primaryColor="#00b828"
|
|
app.py
CHANGED
@@ -4,30 +4,25 @@ import numpy as np
|
|
4 |
from PIL import Image
|
5 |
from time import perf_counter
|
6 |
|
7 |
-
# Page
|
8 |
st.set_page_config(
|
9 |
page_title= "Unify Router Demo",
|
10 |
page_icon="./assets/unify_spiral.png",
|
11 |
layout = "wide",
|
12 |
initial_sidebar_state="collapsed"
|
13 |
)
|
|
|
14 |
|
15 |
-
|
16 |
-
with
|
17 |
-
st.
|
18 |
-
"./assets/unify_logo.png",
|
19 |
-
use_column_width="auto",
|
20 |
-
caption="Route your prompt to the best LLM"
|
21 |
-
)
|
22 |
-
st.write("Chat with the Unify LLM router! Send your prompt to the best LLM endpoint, optimizing for the metric of your choice. For any given model, the router searches across endpoints from different model endpoint providers to find the one endpoint that will provide the best performance for the target metric, for each prompt")
|
23 |
|
|
|
24 |
st.info(
|
25 |
-
body="This demo is only a preview of the router's functionalities. Check out our [Chat UI](https://unify.ai/
|
26 |
icon="βΉοΈ"
|
27 |
)
|
28 |
|
29 |
-
router_avatar = Image.open('./assets/unify_spiral.png')
|
30 |
-
|
31 |
# Parameter choices
|
32 |
strategies = {
|
33 |
'π fastest': "tks-per-sec",
|
@@ -36,8 +31,8 @@ strategies = {
|
|
36 |
}
|
37 |
models = {
|
38 |
'π¦ Llama2 70B Chat': "llama-2-70b-chat",
|
39 |
-
'
|
40 |
-
'
|
41 |
}
|
42 |
|
43 |
# Body
|
@@ -45,6 +40,13 @@ Parameters_Col, Chat_Col = st.columns([1,3])
|
|
45 |
|
46 |
with Parameters_Col:
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
strategy = st.selectbox(
|
49 |
label = 'I want the',
|
50 |
options = tuple(strategies.keys()),
|
@@ -78,47 +80,50 @@ with Parameters_Col:
|
|
78 |
|
79 |
with Chat_Col:
|
80 |
|
81 |
-
|
82 |
-
# Initializing empty chat space
|
83 |
if "messages" not in st.session_state:
|
84 |
st.session_state.messages = []
|
85 |
-
msgs = st.container(height=
|
86 |
|
87 |
# Writing conversation history
|
88 |
for msg in st.session_state.messages:
|
89 |
-
|
|
|
|
|
|
|
90 |
|
91 |
# Preparing client
|
92 |
client = OpenAI(
|
93 |
base_url="https://api.unify.ai/v0/",
|
94 |
-
api_key=st.secrets
|
95 |
)
|
96 |
|
97 |
# Processing prompt box input
|
98 |
if prompt := st.chat_input("Enter your prompt.."):
|
99 |
|
100 |
# Displaying user prompt and saving in message states
|
101 |
-
msgs.chat_message("user").write(prompt)
|
102 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
|
|
|
103 |
|
104 |
-
# Sending prompt to model endpoint
|
105 |
-
start = perf_counter()
|
106 |
-
stream = client.chat.completions.create(
|
107 |
-
model="@".join([
|
108 |
-
models[model],
|
109 |
-
strategies[strategy]
|
110 |
-
]),
|
111 |
-
messages=[
|
112 |
-
{"role": m["role"], "content": m["content"]}
|
113 |
-
for m in st.session_state.messages
|
114 |
-
],
|
115 |
-
stream=True,
|
116 |
-
max_tokens=max_tokens,
|
117 |
-
temperature=temperature
|
118 |
-
)
|
119 |
-
time_to_completion = round(perf_counter() - start, 2)
|
120 |
# Displaying output, metrics, and saving output in message states
|
121 |
-
with msgs.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
# Writing answer progressively
|
124 |
chunks = [chunk for chunk in stream]
|
@@ -136,12 +141,12 @@ with Chat_Col:
|
|
136 |
st.markdown(
|
137 |
f"**{tokens_per_second}** Tokens Per Second - \
|
138 |
**{time_to_completion}** Seconds to complete - \
|
139 |
-
**{cost}** $"
|
140 |
)
|
141 |
|
142 |
# Saving output to message states
|
143 |
output_chunks = [chunk.choices[0].delta.content or "" for chunk in chunks]
|
144 |
-
response =
|
145 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
146 |
|
147 |
# Cancel / Stop button
|
|
|
4 |
from PIL import Image
|
5 |
from time import perf_counter
|
6 |
|
7 |
+
# Page Configuration
|
8 |
st.set_page_config(
|
9 |
page_title= "Unify Router Demo",
|
10 |
page_icon="./assets/unify_spiral.png",
|
11 |
layout = "wide",
|
12 |
initial_sidebar_state="collapsed"
|
13 |
)
|
14 |
+
router_avatar = np.array(Image.open('./assets/unify_spiral.png'))
|
15 |
|
16 |
+
# Custom font
|
17 |
+
with open( "./style.css" ) as css:
|
18 |
+
st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# Info message
|
21 |
st.info(
|
22 |
+
body="This demo is only a preview of the router's functionalities. Check out our [Chat UI](https://unify.ai/) for the full experience, including more endpoints, and extra customization!",
|
23 |
icon="βΉοΈ"
|
24 |
)
|
25 |
|
|
|
|
|
26 |
# Parameter choices
|
27 |
strategies = {
|
28 |
'π fastest': "tks-per-sec",
|
|
|
31 |
}
|
32 |
models = {
|
33 |
'π¦ Llama2 70B Chat': "llama-2-70b-chat",
|
34 |
+
'π¨ Mixtral 8x7B Instruct': "mixtral-8x7b-instruct-v0.1",
|
35 |
+
'π Gemma 7B': "gemma-7b-it",
|
36 |
}
|
37 |
|
38 |
# Body
|
|
|
40 |
|
41 |
with Parameters_Col:
|
42 |
|
43 |
+
# st.header("LLM Router")
|
44 |
+
st.image(
|
45 |
+
"./assets/unify_logo.png",
|
46 |
+
use_column_width="auto",
|
47 |
+
)
|
48 |
+
st.markdown("Send your prompts to the best LLM endpoint and optimize performance, all with a **single API**")
|
49 |
+
|
50 |
strategy = st.selectbox(
|
51 |
label = 'I want the',
|
52 |
options = tuple(strategies.keys()),
|
|
|
80 |
|
81 |
with Chat_Col:
|
82 |
|
83 |
+
# Initializing empty chat space and messages state
|
|
|
84 |
if "messages" not in st.session_state:
|
85 |
st.session_state.messages = []
|
86 |
+
msgs = st.container(height = 350)
|
87 |
|
88 |
# Writing conversation history
|
89 |
for msg in st.session_state.messages:
|
90 |
+
if msg["role"] == "user":
|
91 |
+
msgs.chat_message(msg["role"]).write(msg["content"])
|
92 |
+
else:
|
93 |
+
msgs.chat_message(msg["role"], avatar=router_avatar).write(msg["content"])
|
94 |
|
95 |
# Preparing client
|
96 |
client = OpenAI(
|
97 |
base_url="https://api.unify.ai/v0/",
|
98 |
+
api_key=st.secrets("UNIFY_API")
|
99 |
)
|
100 |
|
101 |
# Processing prompt box input
|
102 |
if prompt := st.chat_input("Enter your prompt.."):
|
103 |
|
104 |
# Displaying user prompt and saving in message states
|
|
|
105 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
106 |
+
with msgs.chat_message("user"):
|
107 |
+
st.write(prompt)
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
# Displaying output, metrics, and saving output in message states
|
110 |
+
with msgs.status("Routing your prompt..",expanded=True):
|
111 |
+
# Sending prompt to model endpoint
|
112 |
+
start = perf_counter()
|
113 |
+
stream = client.chat.completions.create(
|
114 |
+
model="@".join([
|
115 |
+
models[model],
|
116 |
+
strategies[strategy]
|
117 |
+
]),
|
118 |
+
messages=[
|
119 |
+
{"role": m["role"], "content": m["content"]}
|
120 |
+
for m in st.session_state.messages
|
121 |
+
],
|
122 |
+
stream=True,
|
123 |
+
max_tokens=max_tokens,
|
124 |
+
temperature=temperature
|
125 |
+
)
|
126 |
+
time_to_completion = round(perf_counter() - start, 2)
|
127 |
|
128 |
# Writing answer progressively
|
129 |
chunks = [chunk for chunk in stream]
|
|
|
141 |
st.markdown(
|
142 |
f"**{tokens_per_second}** Tokens Per Second - \
|
143 |
**{time_to_completion}** Seconds to complete - \
|
144 |
+
**{cost:.6f}** $"
|
145 |
)
|
146 |
|
147 |
# Saving output to message states
|
148 |
output_chunks = [chunk.choices[0].delta.content or "" for chunk in chunks]
|
149 |
+
response = ''.join(output_chunks)
|
150 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
151 |
|
152 |
# Cancel / Stop button
|
style.css
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:[email protected]&display=swap');
|
2 |
+
|
3 |
+
html, body, [class*="css"] {
|
4 |
+
font-family: 'Inter', sans-serif;
|
5 |
+
font-size: 18px;
|
6 |
+
font-weight: 500;
|
7 |
+
color: #091747;
|
8 |
+
}
|