Claude AI
commited on
Commit
Β·
ce91b95
1
Parent(s):
1ec5d3f
Update to Llama 3.1 405B and improve UI
Browse files- Replace Claude Opus 4 with Llama 3.1 405B as large model
- Update energy consumption: 11.9 Wh per 50 tokens
- Add real-time token calculation after API response
- Fix token estimation for short prompts
- Add emojis throughout interface
- Show actual consumption vs savings based on model selection
- Improve accuracy of energy and cost calculations
app.py
CHANGED
@@ -18,11 +18,11 @@ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
|
18 |
# Model configurations with energy consumption and cost estimates
|
19 |
MODEL_CONFIGS = {
|
20 |
"large": {
|
21 |
-
"name": "
|
22 |
-
"energy_per_token":
|
23 |
-
"cost_per_input_token": 0.
|
24 |
-
"cost_per_output_token": 0.
|
25 |
-
"icon": "
|
26 |
},
|
27 |
"small": {
|
28 |
"name": "Mistral Small 24B",
|
@@ -93,13 +93,20 @@ class ModelRouter:
|
|
93 |
large_config = MODEL_CONFIGS["large"]
|
94 |
return tokens * large_config["energy_per_token"]
|
95 |
|
96 |
-
def calculate_savings(self, selected_model: str, prompt: str) -> Dict:
|
97 |
"""Calculate energy and cost savings compared to using the large model"""
|
98 |
print(f"[SAVINGS] Calculating for model: {selected_model}")
|
99 |
|
100 |
# Calculate input and output tokens separately
|
101 |
input_tokens = max(1, len(prompt) // 4) # Minimum 1 token
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
total_tokens = input_tokens + output_tokens
|
104 |
|
105 |
print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
|
@@ -166,13 +173,13 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
166 |
model_config = MODEL_CONFIGS[selected_model]
|
167 |
print(f"[PROCESS] Using model config: {model_config['name']}")
|
168 |
|
169 |
-
#
|
170 |
-
print(f"[PROCESS] Calculating savings...")
|
171 |
-
|
172 |
-
print(f"[PROCESS]
|
173 |
|
174 |
open_router_model_dict = {
|
175 |
-
"large": "
|
176 |
"small": "mistralai/mistral-small-24b-instruct-2501"
|
177 |
}
|
178 |
# Check if API key is available
|
@@ -228,6 +235,12 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
228 |
import traceback
|
229 |
print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
|
230 |
answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
# Format the response with model info
|
232 |
response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
|
233 |
|
@@ -249,7 +262,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
249 |
<div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
|
250 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
|
251 |
<div>
|
252 |
-
<p style="color: #8795a1; margin: 0; font-size: 0.9em;"
|
253 |
<p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
254 |
{savings['actual_energy']:.1f} Wh
|
255 |
</p>
|
@@ -258,7 +271,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
258 |
</p>
|
259 |
</div>
|
260 |
<div>
|
261 |
-
<p style="color: #8795a1; margin: 0; font-size: 0.9em;"
|
262 |
<p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
263 |
${savings['actual_cost']:.6f}
|
264 |
</p>
|
@@ -275,7 +288,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
275 |
<div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
|
276 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
|
277 |
<div>
|
278 |
-
<p style="color: #8795a1; margin: 0; font-size: 0.9em;"
|
279 |
<p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
280 |
{savings['energy_saved_percent']:.1f}% saved
|
281 |
</p>
|
@@ -287,7 +300,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
|
|
287 |
</p>
|
288 |
</div>
|
289 |
<div>
|
290 |
-
<p style="color: #8795a1; margin: 0; font-size: 0.9em;"
|
291 |
<p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
292 |
{savings['cost_saved_percent']:.1f}% saved
|
293 |
</p>
|
@@ -321,7 +334,7 @@ def get_statistics() -> str:
|
|
321 |
return """
|
322 |
<div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
|
323 |
<p style="margin: 0;">No queries processed yet</p>
|
324 |
-
<p style="margin: 10px 0 0 0; font-size: 0.9em;"
|
325 |
</div>
|
326 |
"""
|
327 |
|
@@ -338,12 +351,12 @@ def get_statistics() -> str:
|
|
338 |
stats = f"""
|
339 |
<div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
|
340 |
<div style="text-align: center; margin-bottom: 20px;">
|
341 |
-
<h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;"
|
342 |
</div>
|
343 |
|
344 |
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
|
345 |
<div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
|
346 |
-
<p style="color: #166534; font-size: 0.9em; margin: 0;"
|
347 |
<p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
348 |
{user_total_energy_saved:.1f}
|
349 |
</p>
|
@@ -351,7 +364,7 @@ def get_statistics() -> str:
|
|
351 |
</div>
|
352 |
|
353 |
<div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
|
354 |
-
<p style="color: #1e40af; font-size: 0.9em; margin: 0;"
|
355 |
<p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
356 |
${user_total_cost_saved:.6f}
|
357 |
</p>
|
@@ -396,10 +409,10 @@ with gr.Blocks(
|
|
396 |
gr.Markdown("""
|
397 |
<div style="margin-bottom: 30px;">
|
398 |
<h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
|
399 |
-
Do I really need a huge LLM?
|
400 |
</h1>
|
401 |
<p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
|
402 |
-
Let's find out! This tool automatically routes your queries to the right-sized model.
|
403 |
</p>
|
404 |
</div>
|
405 |
""")
|
@@ -415,14 +428,14 @@ with gr.Blocks(
|
|
415 |
|
416 |
with gr.Row():
|
417 |
msg = gr.Textbox(
|
418 |
-
placeholder="Type your message here...",
|
419 |
show_label=False,
|
420 |
scale=9,
|
421 |
container=False,
|
422 |
elem_classes=["message-input"]
|
423 |
)
|
424 |
submit = gr.Button(
|
425 |
-
"Send",
|
426 |
variant="primary",
|
427 |
scale=1,
|
428 |
min_width=100
|
@@ -433,7 +446,7 @@ with gr.Blocks(
|
|
433 |
model_display = gr.HTML(
|
434 |
value="""
|
435 |
<div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
|
436 |
-
<p style="margin: 0;"
|
437 |
</div>
|
438 |
""",
|
439 |
label="Selected Model"
|
@@ -443,7 +456,7 @@ with gr.Blocks(
|
|
443 |
savings_display = gr.HTML(
|
444 |
value="""
|
445 |
<div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
|
446 |
-
<p style="margin: 0;"
|
447 |
</div>
|
448 |
""",
|
449 |
label="Efficiency Metrics"
|
@@ -459,7 +472,7 @@ with gr.Blocks(
|
|
459 |
with gr.Row():
|
460 |
gr.Markdown("""
|
461 |
<div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
|
462 |
-
<p style="margin: 5px 0;"
|
463 |
</div>
|
464 |
""")
|
465 |
|
|
|
18 |
# Model configurations with energy consumption and cost estimates
|
19 |
MODEL_CONFIGS = {
|
20 |
"large": {
|
21 |
+
"name": "Llama 3.1 405B",
|
22 |
+
"energy_per_token": 0.238, # Wh per token (11.9 Wh / 50 tokens)
|
23 |
+
"cost_per_input_token": 0.000003, # $3/M tokens (OpenRouter pricing)
|
24 |
+
"cost_per_output_token": 0.000003, # $3/M tokens (OpenRouter pricing)
|
25 |
+
"icon": "π¦"
|
26 |
},
|
27 |
"small": {
|
28 |
"name": "Mistral Small 24B",
|
|
|
93 |
large_config = MODEL_CONFIGS["large"]
|
94 |
return tokens * large_config["energy_per_token"]
|
95 |
|
96 |
+
def calculate_savings(self, selected_model: str, prompt: str, response: str = None) -> Dict:
|
97 |
"""Calculate energy and cost savings compared to using the large model"""
|
98 |
print(f"[SAVINGS] Calculating for model: {selected_model}")
|
99 |
|
100 |
# Calculate input and output tokens separately
|
101 |
input_tokens = max(1, len(prompt) // 4) # Minimum 1 token
|
102 |
+
|
103 |
+
if response:
|
104 |
+
# Use actual response length if available
|
105 |
+
output_tokens = max(1, len(response) // 4)
|
106 |
+
else:
|
107 |
+
# Estimate if no response yet (for preview)
|
108 |
+
output_tokens = max(10, input_tokens) # Assume at least 10 tokens response
|
109 |
+
|
110 |
total_tokens = input_tokens + output_tokens
|
111 |
|
112 |
print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
|
|
|
173 |
model_config = MODEL_CONFIGS[selected_model]
|
174 |
print(f"[PROCESS] Using model config: {model_config['name']}")
|
175 |
|
176 |
+
# Initial savings estimate (will be recalculated after getting response)
|
177 |
+
print(f"[PROCESS] Calculating initial savings estimate...")
|
178 |
+
initial_savings = router.calculate_savings(selected_model, message)
|
179 |
+
print(f"[PROCESS] Initial estimate: {initial_savings['energy_saved_percent']:.1f}% energy, {initial_savings['cost_saved_percent']:.1f}% cost")
|
180 |
|
181 |
open_router_model_dict = {
|
182 |
+
"large": "meta-llama/llama-3.1-405b-instruct",
|
183 |
"small": "mistralai/mistral-small-24b-instruct-2501"
|
184 |
}
|
185 |
# Check if API key is available
|
|
|
235 |
import traceback
|
236 |
print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
|
237 |
answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
|
238 |
+
|
239 |
+
# Recalculate savings with actual response
|
240 |
+
print(f"[PROCESS] Recalculating savings with actual response...")
|
241 |
+
savings = router.calculate_savings(selected_model, message, answer)
|
242 |
+
print(f"[PROCESS] Final savings: {savings['energy_saved_percent']:.1f}% energy, {savings['cost_saved_percent']:.1f}% cost")
|
243 |
+
|
244 |
# Format the response with model info
|
245 |
response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
|
246 |
|
|
|
262 |
<div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
|
263 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
|
264 |
<div>
|
265 |
+
<p style="color: #8795a1; margin: 0; font-size: 0.9em;">π₯ Energy Consumption</p>
|
266 |
<p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
267 |
{savings['actual_energy']:.1f} Wh
|
268 |
</p>
|
|
|
271 |
</p>
|
272 |
</div>
|
273 |
<div>
|
274 |
+
<p style="color: #8795a1; margin: 0; font-size: 0.9em;">πΈ Cost Impact</p>
|
275 |
<p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
276 |
${savings['actual_cost']:.6f}
|
277 |
</p>
|
|
|
288 |
<div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
|
289 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
|
290 |
<div>
|
291 |
+
<p style="color: #8795a1; margin: 0; font-size: 0.9em;">β‘ Energy Efficiency</p>
|
292 |
<p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
293 |
{savings['energy_saved_percent']:.1f}% saved
|
294 |
</p>
|
|
|
300 |
</p>
|
301 |
</div>
|
302 |
<div>
|
303 |
+
<p style="color: #8795a1; margin: 0; font-size: 0.9em;">π° Cost Optimization</p>
|
304 |
<p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
305 |
{savings['cost_saved_percent']:.1f}% saved
|
306 |
</p>
|
|
|
334 |
return """
|
335 |
<div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
|
336 |
<p style="margin: 0;">No queries processed yet</p>
|
337 |
+
<p style="margin: 10px 0 0 0; font-size: 0.9em;">π¬ Start a conversation to see your impact metrics</p>
|
338 |
</div>
|
339 |
"""
|
340 |
|
|
|
351 |
stats = f"""
|
352 |
<div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
|
353 |
<div style="text-align: center; margin-bottom: 20px;">
|
354 |
+
<h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;">π Your Total Impact</h4>
|
355 |
</div>
|
356 |
|
357 |
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
|
358 |
<div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
|
359 |
+
<p style="color: #166534; font-size: 0.9em; margin: 0;">π± Energy Saved</p>
|
360 |
<p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
361 |
{user_total_energy_saved:.1f}
|
362 |
</p>
|
|
|
364 |
</div>
|
365 |
|
366 |
<div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
|
367 |
+
<p style="color: #1e40af; font-size: 0.9em; margin: 0;">π΅ Money Saved</p>
|
368 |
<p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
|
369 |
${user_total_cost_saved:.6f}
|
370 |
</p>
|
|
|
409 |
gr.Markdown("""
|
410 |
<div style="margin-bottom: 30px;">
|
411 |
<h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
|
412 |
+
π€ Do I *really* need a huge LLM?
|
413 |
</h1>
|
414 |
<p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
|
415 |
+
Let's find out! This tool automatically routes your queries to the right-sized model. π―
|
416 |
</p>
|
417 |
</div>
|
418 |
""")
|
|
|
428 |
|
429 |
with gr.Row():
|
430 |
msg = gr.Textbox(
|
431 |
+
placeholder="π Type your message here...",
|
432 |
show_label=False,
|
433 |
scale=9,
|
434 |
container=False,
|
435 |
elem_classes=["message-input"]
|
436 |
)
|
437 |
submit = gr.Button(
|
438 |
+
"Send π",
|
439 |
variant="primary",
|
440 |
scale=1,
|
441 |
min_width=100
|
|
|
446 |
model_display = gr.HTML(
|
447 |
value="""
|
448 |
<div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
|
449 |
+
<p style="margin: 0;">π€ Model selection will appear here</p>
|
450 |
</div>
|
451 |
""",
|
452 |
label="Selected Model"
|
|
|
456 |
savings_display = gr.HTML(
|
457 |
value="""
|
458 |
<div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
|
459 |
+
<p style="margin: 0;">π Efficiency metrics will appear here</p>
|
460 |
</div>
|
461 |
""",
|
462 |
label="Efficiency Metrics"
|
|
|
472 |
with gr.Row():
|
473 |
gr.Markdown("""
|
474 |
<div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
|
475 |
+
<p style="margin: 5px 0;">π Comparing small vs large model efficiency β’ π Real-time tracking β’ π Environmental impact monitoring</p>
|
476 |
</div>
|
477 |
""")
|
478 |
|