Claude AI commited on
Commit
ce91b95
Β·
1 Parent(s): 1ec5d3f

Update to Llama 3.1 405B and improve UI

Browse files

- Replace Claude Opus 4 with Llama 3.1 405B as large model
- Update energy consumption: 11.9 Wh per 50 tokens
- Add real-time token calculation after API response
- Fix token estimation for short prompts
- Add emojis throughout interface
- Show actual consumption vs savings based on model selection
- Improve accuracy of energy and cost calculations

Files changed (1) hide show
  1. app.py +40 -27
app.py CHANGED
@@ -18,11 +18,11 @@ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
18
  # Model configurations with energy consumption and cost estimates
19
  MODEL_CONFIGS = {
20
  "large": {
21
- "name": "Claude Opus 4",
22
- "energy_per_token": 1.356, # Wh per token (67.8 Wh / 50 tokens)
23
- "cost_per_input_token": 0.000015, # $15/M tokens
24
- "cost_per_output_token": 0.000075, # $75/M tokens
25
- "icon": "🧠"
26
  },
27
  "small": {
28
  "name": "Mistral Small 24B",
@@ -93,13 +93,20 @@ class ModelRouter:
93
  large_config = MODEL_CONFIGS["large"]
94
  return tokens * large_config["energy_per_token"]
95
 
96
- def calculate_savings(self, selected_model: str, prompt: str) -> Dict:
97
  """Calculate energy and cost savings compared to using the large model"""
98
  print(f"[SAVINGS] Calculating for model: {selected_model}")
99
 
100
  # Calculate input and output tokens separately
101
  input_tokens = max(1, len(prompt) // 4) # Minimum 1 token
102
- output_tokens = max(1, input_tokens) # Estimate same length response, minimum 1
 
 
 
 
 
 
 
103
  total_tokens = input_tokens + output_tokens
104
 
105
  print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
@@ -166,13 +173,13 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
166
  model_config = MODEL_CONFIGS[selected_model]
167
  print(f"[PROCESS] Using model config: {model_config['name']}")
168
 
169
- # Calculate savings
170
- print(f"[PROCESS] Calculating savings...")
171
- savings = router.calculate_savings(selected_model, message)
172
- print(f"[PROCESS] Savings calculated: {savings['energy_saved_percent']:.1f}% energy, {savings['cost_saved_percent']:.1f}% cost")
173
 
174
  open_router_model_dict = {
175
- "large": "anthropic/claude-opus-4",
176
  "small": "mistralai/mistral-small-24b-instruct-2501"
177
  }
178
  # Check if API key is available
@@ -228,6 +235,12 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
228
  import traceback
229
  print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
230
  answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
 
 
 
 
 
 
231
  # Format the response with model info
232
  response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
233
 
@@ -249,7 +262,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
249
  <div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
250
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
251
  <div>
252
- <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Energy Consumption</p>
253
  <p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
254
  {savings['actual_energy']:.1f} Wh
255
  </p>
@@ -258,7 +271,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
258
  </p>
259
  </div>
260
  <div>
261
- <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Cost Impact</p>
262
  <p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
263
  ${savings['actual_cost']:.6f}
264
  </p>
@@ -275,7 +288,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
275
  <div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
276
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
277
  <div>
278
- <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Energy Efficiency</p>
279
  <p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
280
  {savings['energy_saved_percent']:.1f}% saved
281
  </p>
@@ -287,7 +300,7 @@ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, s
287
  </p>
288
  </div>
289
  <div>
290
- <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Cost Optimization</p>
291
  <p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
292
  {savings['cost_saved_percent']:.1f}% saved
293
  </p>
@@ -321,7 +334,7 @@ def get_statistics() -> str:
321
  return """
322
  <div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
323
  <p style="margin: 0;">No queries processed yet</p>
324
- <p style="margin: 10px 0 0 0; font-size: 0.9em;">Start a conversation to see your impact metrics</p>
325
  </div>
326
  """
327
 
@@ -338,12 +351,12 @@ def get_statistics() -> str:
338
  stats = f"""
339
  <div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
340
  <div style="text-align: center; margin-bottom: 20px;">
341
- <h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;">Your Total Impact</h4>
342
  </div>
343
 
344
  <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
345
  <div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
346
- <p style="color: #166534; font-size: 0.9em; margin: 0;">Energy Saved</p>
347
  <p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
348
  {user_total_energy_saved:.1f}
349
  </p>
@@ -351,7 +364,7 @@ def get_statistics() -> str:
351
  </div>
352
 
353
  <div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
354
- <p style="color: #1e40af; font-size: 0.9em; margin: 0;">Money Saved</p>
355
  <p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
356
  ${user_total_cost_saved:.6f}
357
  </p>
@@ -396,10 +409,10 @@ with gr.Blocks(
396
  gr.Markdown("""
397
  <div style="margin-bottom: 30px;">
398
  <h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
399
- Do I really need a huge LLM?
400
  </h1>
401
  <p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
402
- Let's find out! This tool automatically routes your queries to the right-sized model.
403
  </p>
404
  </div>
405
  """)
@@ -415,14 +428,14 @@ with gr.Blocks(
415
 
416
  with gr.Row():
417
  msg = gr.Textbox(
418
- placeholder="Type your message here...",
419
  show_label=False,
420
  scale=9,
421
  container=False,
422
  elem_classes=["message-input"]
423
  )
424
  submit = gr.Button(
425
- "Send",
426
  variant="primary",
427
  scale=1,
428
  min_width=100
@@ -433,7 +446,7 @@ with gr.Blocks(
433
  model_display = gr.HTML(
434
  value="""
435
  <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
436
- <p style="margin: 0;">Model selection will appear here</p>
437
  </div>
438
  """,
439
  label="Selected Model"
@@ -443,7 +456,7 @@ with gr.Blocks(
443
  savings_display = gr.HTML(
444
  value="""
445
  <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
446
- <p style="margin: 0;">Efficiency metrics will appear here</p>
447
  </div>
448
  """,
449
  label="Efficiency Metrics"
@@ -459,7 +472,7 @@ with gr.Blocks(
459
  with gr.Row():
460
  gr.Markdown("""
461
  <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
462
- <p style="margin: 5px 0;">Comparing small vs large model efficiency β€’ Real-time tracking β€’ Environmental impact monitoring</p>
463
  </div>
464
  """)
465
 
 
18
  # Model configurations with energy consumption and cost estimates
19
  MODEL_CONFIGS = {
20
  "large": {
21
+ "name": "Llama 3.1 405B",
22
+ "energy_per_token": 0.238, # Wh per token (11.9 Wh / 50 tokens)
23
+ "cost_per_input_token": 0.000003, # $3/M tokens (OpenRouter pricing)
24
+ "cost_per_output_token": 0.000003, # $3/M tokens (OpenRouter pricing)
25
+ "icon": "πŸ¦™"
26
  },
27
  "small": {
28
  "name": "Mistral Small 24B",
 
93
  large_config = MODEL_CONFIGS["large"]
94
  return tokens * large_config["energy_per_token"]
95
 
96
+ def calculate_savings(self, selected_model: str, prompt: str, response: str = None) -> Dict:
97
  """Calculate energy and cost savings compared to using the large model"""
98
  print(f"[SAVINGS] Calculating for model: {selected_model}")
99
 
100
  # Calculate input and output tokens separately
101
  input_tokens = max(1, len(prompt) // 4) # Minimum 1 token
102
+
103
+ if response:
104
+ # Use actual response length if available
105
+ output_tokens = max(1, len(response) // 4)
106
+ else:
107
+ # Estimate if no response yet (for preview)
108
+ output_tokens = max(10, input_tokens) # Assume at least 10 tokens response
109
+
110
  total_tokens = input_tokens + output_tokens
111
 
112
  print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
 
173
  model_config = MODEL_CONFIGS[selected_model]
174
  print(f"[PROCESS] Using model config: {model_config['name']}")
175
 
176
+ # Initial savings estimate (will be recalculated after getting response)
177
+ print(f"[PROCESS] Calculating initial savings estimate...")
178
+ initial_savings = router.calculate_savings(selected_model, message)
179
+ print(f"[PROCESS] Initial estimate: {initial_savings['energy_saved_percent']:.1f}% energy, {initial_savings['cost_saved_percent']:.1f}% cost")
180
 
181
  open_router_model_dict = {
182
+ "large": "meta-llama/llama-3.1-405b-instruct",
183
  "small": "mistralai/mistral-small-24b-instruct-2501"
184
  }
185
  # Check if API key is available
 
235
  import traceback
236
  print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
237
  answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
238
+
239
+ # Recalculate savings with actual response
240
+ print(f"[PROCESS] Recalculating savings with actual response...")
241
+ savings = router.calculate_savings(selected_model, message, answer)
242
+ print(f"[PROCESS] Final savings: {savings['energy_saved_percent']:.1f}% energy, {savings['cost_saved_percent']:.1f}% cost")
243
+
244
  # Format the response with model info
245
  response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
246
 
 
262
  <div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
263
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
264
  <div>
265
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ”₯ Energy Consumption</p>
266
  <p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
267
  {savings['actual_energy']:.1f} Wh
268
  </p>
 
271
  </p>
272
  </div>
273
  <div>
274
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ’Έ Cost Impact</p>
275
  <p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
276
  ${savings['actual_cost']:.6f}
277
  </p>
 
288
  <div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
289
  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
290
  <div>
291
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">⚑ Energy Efficiency</p>
292
  <p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
293
  {savings['energy_saved_percent']:.1f}% saved
294
  </p>
 
300
  </p>
301
  </div>
302
  <div>
303
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ’° Cost Optimization</p>
304
  <p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
305
  {savings['cost_saved_percent']:.1f}% saved
306
  </p>
 
334
  return """
335
  <div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
336
  <p style="margin: 0;">No queries processed yet</p>
337
+ <p style="margin: 10px 0 0 0; font-size: 0.9em;">πŸ’¬ Start a conversation to see your impact metrics</p>
338
  </div>
339
  """
340
 
 
351
  stats = f"""
352
  <div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
353
  <div style="text-align: center; margin-bottom: 20px;">
354
+ <h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;">🌍 Your Total Impact</h4>
355
  </div>
356
 
357
  <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
358
  <div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
359
+ <p style="color: #166534; font-size: 0.9em; margin: 0;">🌱 Energy Saved</p>
360
  <p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
361
  {user_total_energy_saved:.1f}
362
  </p>
 
364
  </div>
365
 
366
  <div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
367
+ <p style="color: #1e40af; font-size: 0.9em; margin: 0;">πŸ’΅ Money Saved</p>
368
  <p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
369
  ${user_total_cost_saved:.6f}
370
  </p>
 
409
  gr.Markdown("""
410
  <div style="margin-bottom: 30px;">
411
  <h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
412
+ πŸ€” Do I *really* need a huge LLM?
413
  </h1>
414
  <p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
415
+ Let's find out! This tool automatically routes your queries to the right-sized model. 🎯
416
  </p>
417
  </div>
418
  """)
 
428
 
429
  with gr.Row():
430
  msg = gr.Textbox(
431
+ placeholder="πŸ’­ Type your message here...",
432
  show_label=False,
433
  scale=9,
434
  container=False,
435
  elem_classes=["message-input"]
436
  )
437
  submit = gr.Button(
438
+ "Send πŸš€",
439
  variant="primary",
440
  scale=1,
441
  min_width=100
 
446
  model_display = gr.HTML(
447
  value="""
448
  <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
449
+ <p style="margin: 0;">πŸ€– Model selection will appear here</p>
450
  </div>
451
  """,
452
  label="Selected Model"
 
456
  savings_display = gr.HTML(
457
  value="""
458
  <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
459
+ <p style="margin: 0;">πŸ“Š Efficiency metrics will appear here</p>
460
  </div>
461
  """,
462
  label="Efficiency Metrics"
 
472
  with gr.Row():
473
  gr.Markdown("""
474
  <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
475
+ <p style="margin: 5px 0;">πŸ” Comparing small vs large model efficiency β€’ πŸ“ˆ Real-time tracking β€’ 🌎 Environmental impact monitoring</p>
476
  </div>
477
  """)
478