Claude AI commited on
Commit
1ec5d3f
·
1 Parent(s): f049b37

Add application files with improved energy tracking

Browse files

- Add Gradio app with model routing based on ModernBERT classification
- Show energy and cost savings when using small model
- Display actual consumption when using large model
- Support for OpenRouter API integration
- Fix token estimation for short prompts

Files changed (3) hide show
  1. app.py +500 -0
  2. bertmodel.py +68 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import time
4
+ from typing import Dict, Tuple, List
5
+ from bertmodel import predict_label
6
+ # from ecologits import EcoLogits # Removed - using OpenRouter instead
7
+ # from openai import OpenAI # Removed - using OpenRouter instead
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import requests
11
+ import json
12
+
13
+ # Set environment variable to suppress tokenizers warning
14
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
15
+
16
+ load_dotenv()
17
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
18
+ # Model configurations with energy consumption and cost estimates
19
+ MODEL_CONFIGS = {
20
+ "large": {
21
+ "name": "Claude Opus 4",
22
+ "energy_per_token": 1.356, # Wh per token (67.8 Wh / 50 tokens)
23
+ "cost_per_input_token": 0.000015, # $15/M tokens
24
+ "cost_per_output_token": 0.000075, # $75/M tokens
25
+ "icon": "🧠"
26
+ },
27
+ "small": {
28
+ "name": "Mistral Small 24B",
29
+ "energy_per_token": 0.00596, # Wh per token (0.298 Wh / 50 tokens)
30
+ "cost_per_input_token": 0.00000005, # $0.05/M tokens
31
+ "cost_per_output_token": 0.00000012, # $0.12/M tokens
32
+ "icon": "⚡"
33
+ }
34
+ }
35
+
36
+ class ModelRouter:
37
+ def __init__(self):
38
+ self.routing_history = []
39
+ print("[INIT] ModelRouter initialized")
40
+
41
+ def classify_prompt(self, prompt: str) -> str:
42
+ print(f"\n[CLASSIFY] Classifying prompt: '{prompt[:50]}...'")
43
+ label = predict_label(prompt)
44
+ print(f"[CLASSIFY] ModernBERT returned label: '{label}'")
45
+ return label
46
+
47
+ def select_model(self, prompt: str) -> str:
48
+ """Select the most efficient model based on prompt classification."""
49
+ prompt_type = self.classify_prompt(prompt)
50
+ # Normalize
51
+ key = prompt_type.strip().lower()
52
+ print(f"[SELECT] Normalized label: '{key}'")
53
+
54
+ # Map normalized labels to actual MODEL_CONFIGS keys
55
+ if "small" in key:
56
+ print(f"[SELECT] Selected: SMALL model (Mistral Small 24B)")
57
+ return "small"
58
+ else:
59
+ print(f"[SELECT] Selected: LARGE model (Claude Opus 4)")
60
+ return "large"
61
+
62
+
63
+ def estimate_tokens(self,
64
+ prompt: str,
65
+ response: str | None = None,
66
+ max_response_tokens: int | None = None) -> int:
67
+ """
68
+ Estimate total token count: exact prompt tokens +
69
+ a target number of response tokens.
70
+ """
71
+ # Simple estimation: 4 characters = 1 token
72
+ prompt_tokens = len(prompt) // 4
73
+ print(f"[TOKENS] Prompt tokens: {prompt_tokens} (from {len(prompt)} chars)")
74
+
75
+ if response is not None:
76
+ response_tokens = len(response) // 4
77
+ elif max_response_tokens is not None:
78
+ # you’re reserving this many tokens for the model’s reply
79
+ response_tokens = max_response_tokens
80
+ else:
81
+ # Estimate response will be similar length to prompt
82
+ response_tokens = prompt_tokens
83
+
84
+ total_tokens = prompt_tokens + response_tokens
85
+ print(f"[TOKENS] Response tokens: {response_tokens}, Total: {total_tokens}")
86
+ return total_tokens
87
+
88
+ def estimate_large_model_energy(self, tokens: int) -> float:
89
+ """
90
+ Estimate large model energy consumption based on tokens.
91
+ Using empirical estimates for energy consumption.
92
+ """
93
+ large_config = MODEL_CONFIGS["large"]
94
+ return tokens * large_config["energy_per_token"]
95
+
96
+ def calculate_savings(self, selected_model: str, prompt: str) -> Dict:
97
+ """Calculate energy and cost savings compared to using the large model"""
98
+ print(f"[SAVINGS] Calculating for model: {selected_model}")
99
+
100
+ # Calculate input and output tokens separately
101
+ input_tokens = max(1, len(prompt) // 4) # Minimum 1 token
102
+ output_tokens = max(1, input_tokens) # Estimate same length response, minimum 1
103
+ total_tokens = input_tokens + output_tokens
104
+
105
+ print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
106
+
107
+ selected_config = MODEL_CONFIGS[selected_model]
108
+ large_config = MODEL_CONFIGS["large"]
109
+
110
+ # Calculate actual usage
111
+ actual_energy = total_tokens * selected_config["energy_per_token"]
112
+ actual_cost = (input_tokens * selected_config["cost_per_input_token"] +
113
+ output_tokens * selected_config["cost_per_output_token"])
114
+
115
+ # Calculate large model usage
116
+ large_energy = self.estimate_large_model_energy(total_tokens)
117
+ large_cost = (input_tokens * large_config["cost_per_input_token"] +
118
+ output_tokens * large_config["cost_per_output_token"])
119
+
120
+ # Calculate savings (only positive if small model is selected)
121
+ if selected_model == "small":
122
+ energy_saved = large_energy - actual_energy
123
+ cost_saved = large_cost - actual_cost
124
+ energy_saved_percent = (energy_saved / large_energy) * 100 if large_energy > 0 else 0
125
+ cost_saved_percent = (cost_saved / large_cost) * 100 if large_cost > 0 else 0
126
+ else:
127
+ # No savings if using the large model
128
+ energy_saved = 0
129
+ cost_saved = 0
130
+ energy_saved_percent = 0
131
+ cost_saved_percent = 0
132
+
133
+ print(f"[SAVINGS] Selected: {selected_model}")
134
+ print(f"[SAVINGS] Actual energy: {actual_energy:.4f} Wh, Large energy: {large_energy:.4f} Wh")
135
+ print(f"[SAVINGS] Actual cost: ${actual_cost:.8f}, Large cost: ${large_cost:.8f}")
136
+ print(f"[SAVINGS] Energy saved: {energy_saved:.4f} Wh ({energy_saved_percent:.1f}%)")
137
+ print(f"[SAVINGS] Cost saved: ${cost_saved:.8f} ({cost_saved_percent:.1f}%)")
138
+
139
+ return {
140
+ "selected_model": selected_config["name"],
141
+ "tokens": total_tokens,
142
+ "actual_energy": actual_energy,
143
+ "actual_cost": actual_cost,
144
+ "large_energy": large_energy,
145
+ "large_cost": large_cost,
146
+ "energy_saved": energy_saved,
147
+ "cost_saved": cost_saved,
148
+ "energy_saved_percent": energy_saved_percent,
149
+ "cost_saved_percent": cost_saved_percent,
150
+ "is_large_model": selected_model == "large" # Add flag for template
151
+ }
152
+
153
+ print("[STARTUP] Initializing ModelRouter...")
154
+ router = ModelRouter()
155
+ print("[STARTUP] ModelRouter ready")
156
+ print(f"[STARTUP] Available models: {list(MODEL_CONFIGS.keys())}")
157
+ print(f"[STARTUP] OpenRouter API Key: {'SET' if OPENROUTER_API_KEY else 'NOT SET'}")
158
+
159
+ def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, str]:
160
+ """Process the user message and return response with savings info"""
161
+ print(f"\n{'='*60}")
162
+ print(f"[PROCESS] New message received: '{message[:100]}...'")
163
+
164
+ # Route to appropriate model
165
+ selected_model = router.select_model(message)
166
+ model_config = MODEL_CONFIGS[selected_model]
167
+ print(f"[PROCESS] Using model config: {model_config['name']}")
168
+
169
+ # Calculate savings
170
+ print(f"[PROCESS] Calculating savings...")
171
+ savings = router.calculate_savings(selected_model, message)
172
+ print(f"[PROCESS] Savings calculated: {savings['energy_saved_percent']:.1f}% energy, {savings['cost_saved_percent']:.1f}% cost")
173
+
174
+ open_router_model_dict = {
175
+ "large": "anthropic/claude-opus-4",
176
+ "small": "mistralai/mistral-small-24b-instruct-2501"
177
+ }
178
+ # Check if API key is available
179
+ if not OPENROUTER_API_KEY:
180
+ print(f"[API] No OpenRouter API key found - running in DEMO MODE")
181
+ answer = f"[Demo Mode] This would be a response from {model_config['name']} to: {message[:50]}..."
182
+ else:
183
+ print(f"[API] OpenRouter API key found: {OPENROUTER_API_KEY[:10]}...")
184
+ try:
185
+ model_id = open_router_model_dict[selected_model]
186
+ print(f"[API] Calling OpenRouter with model: {model_id}")
187
+
188
+ request_data = {
189
+ "model": model_id,
190
+ "messages": [
191
+ {
192
+ "role": "user",
193
+ "content": message
194
+ }
195
+ ]
196
+ }
197
+ print(f"[API] Request data: {json.dumps(request_data, indent=2)[:200]}...")
198
+
199
+ response = requests.post(
200
+ url="https://openrouter.ai/api/v1/chat/completions",
201
+ headers={
202
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
203
+ "Content-Type": "application/json"
204
+ },
205
+ data=json.dumps(request_data)
206
+ )
207
+
208
+ # Debug: print response status and content
209
+ print(f"[API] Response Status Code: {response.status_code}")
210
+ print(f"[API] Response Headers: {dict(response.headers)}")
211
+
212
+ if response.status_code != 200:
213
+ print(f"[API ERROR] Full response: {response.text}")
214
+ answer = f"[API Error {response.status_code}] {response.text[:200]}..."
215
+ else:
216
+ data = response.json()
217
+ print(f"[API] Response keys: {list(data.keys())}")
218
+
219
+ if "choices" in data and len(data["choices"]) > 0:
220
+ answer = data["choices"][0]["message"]["content"]
221
+ print(f"[API] Successfully got response: {answer[:100]}...")
222
+ else:
223
+ print(f"[API ERROR] Unexpected response format: {json.dumps(data, indent=2)}")
224
+ answer = f"[Error] Unexpected response format from OpenRouter API"
225
+ except Exception as e:
226
+ print(f"[API EXCEPTION] Error type: {type(e).__name__}")
227
+ print(f"[API EXCEPTION] Error message: {str(e)}")
228
+ import traceback
229
+ print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
230
+ answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
231
+ # Format the response with model info
232
+ response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
233
+
234
+ # Format model info
235
+ model_info = f"""
236
+ <div style="background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); padding: 20px; border-radius: 12px; margin-bottom: 20px;">
237
+ <div style="display: flex; align-items: center; margin-bottom: 10px;">
238
+ <span style="font-size: 2em; margin-right: 10px;">{model_config['icon']}</span>
239
+ <h3 style="margin: 0; color: #2c3e50;">{model_config['name']}</h3>
240
+ </div>
241
+ <p style="color: #5a6c7d; margin: 5px 0;">Optimal model selected for your query</p>
242
+ </div>
243
+ """
244
+
245
+ # Format savings information with conditional display based on model
246
+ if savings['is_large_model']:
247
+ # Show actual consumption for large model with warning colors
248
+ savings_info = f"""
249
+ <div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
250
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
251
+ <div>
252
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Energy Consumption</p>
253
+ <p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
254
+ {savings['actual_energy']:.1f} Wh
255
+ </p>
256
+ <p style="color: #7c2d12; font-size: 0.85em; margin: 0;">
257
+ High energy usage
258
+ </p>
259
+ </div>
260
+ <div>
261
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Cost Impact</p>
262
+ <p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
263
+ ${savings['actual_cost']:.6f}
264
+ </p>
265
+ <p style="color: #991b1b; font-size: 0.85em; margin: 0;">
266
+ Premium pricing
267
+ </p>
268
+ </div>
269
+ </div>
270
+ </div>
271
+ """
272
+ else:
273
+ # Show savings for small model with positive colors
274
+ savings_info = f"""
275
+ <div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
276
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
277
+ <div>
278
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Energy Efficiency</p>
279
+ <p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
280
+ {savings['energy_saved_percent']:.1f}% saved
281
+ </p>
282
+ <p style="color: #5a6c7d; font-size: 0.85em; margin: 0;">
283
+ {savings['energy_saved']:.1f} Wh reduction
284
+ </p>
285
+ <p style="color: #8795a1; font-size: 0.75em; margin: 3px 0 0 0; font-style: italic;">
286
+ vs. using large model
287
+ </p>
288
+ </div>
289
+ <div>
290
+ <p style="color: #8795a1; margin: 0; font-size: 0.9em;">Cost Optimization</p>
291
+ <p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
292
+ {savings['cost_saved_percent']:.1f}% saved
293
+ </p>
294
+ <p style="color: #5a6c7d; font-size: 0.85em; margin: 0;">
295
+ ${savings['cost_saved']:.8f} reduction
296
+ </p>
297
+ <p style="color: #8795a1; font-size: 0.75em; margin: 3px 0 0 0; font-style: italic;">
298
+ vs. using large model
299
+ </p>
300
+ </div>
301
+ </div>
302
+ </div>
303
+ """
304
+
305
+ # Add to routing history
306
+ router.routing_history.append({
307
+ "timestamp": time.time(),
308
+ "prompt": message,
309
+ "model": selected_model,
310
+ "savings": savings
311
+ })
312
+
313
+ print(f"[PROCESS] Response formatted, returning to UI")
314
+ print(f"{'='*60}\n")
315
+
316
+ return response, model_info, savings_info
317
+
318
+ def get_statistics() -> str:
319
+ """Get cumulative statistics from routing history"""
320
+ if not router.routing_history:
321
+ return """
322
+ <div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
323
+ <p style="margin: 0;">No queries processed yet</p>
324
+ <p style="margin: 10px 0 0 0; font-size: 0.9em;">Start a conversation to see your impact metrics</p>
325
+ </div>
326
+ """
327
+
328
+ total_queries = len(router.routing_history)
329
+
330
+ # Calculate user's total savings
331
+ user_total_energy_saved = sum(entry["savings"]["energy_saved"] for entry in router.routing_history)
332
+ user_total_cost_saved = sum(entry["savings"]["cost_saved"] for entry in router.routing_history)
333
+
334
+ # Count how many times each model was used
335
+ small_model_count = sum(1 for entry in router.routing_history if entry["model"] == "small")
336
+ large_model_count = sum(1 for entry in router.routing_history if entry["model"] == "large")
337
+
338
+ stats = f"""
339
+ <div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
340
+ <div style="text-align: center; margin-bottom: 20px;">
341
+ <h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;">Your Total Impact</h4>
342
+ </div>
343
+
344
+ <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
345
+ <div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
346
+ <p style="color: #166534; font-size: 0.9em; margin: 0;">Energy Saved</p>
347
+ <p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
348
+ {user_total_energy_saved:.1f}
349
+ </p>
350
+ <p style="color: #166534; font-size: 0.8em; margin: 0;">Wh</p>
351
+ </div>
352
+
353
+ <div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
354
+ <p style="color: #1e40af; font-size: 0.9em; margin: 0;">Money Saved</p>
355
+ <p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
356
+ ${user_total_cost_saved:.6f}
357
+ </p>
358
+ <p style="color: #1e40af; font-size: 0.8em; margin: 0;">USD</p>
359
+ </div>
360
+ </div>
361
+
362
+ <div style="background: #fefce8; border-radius: 8px; padding: 12px; text-align: center;">
363
+ <p style="color: #713f12; font-size: 0.9em; margin: 0;">
364
+ <span style="font-weight: 600;">Model Usage:</span> Small model {small_model_count}x, Large model {large_model_count}x
365
+ </p>
366
+ </div>
367
+ </div>
368
+ """
369
+
370
+ return stats
371
+
372
+ # Custom CSS for a more professional look
373
+ custom_css = """
374
+ .gradio-container {
375
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;
376
+ }
377
+ .message {
378
+ padding: 12px 16px !important;
379
+ border-radius: 8px !important;
380
+ }
381
+ """
382
+
383
+ # Create Gradio interface
384
+ with gr.Blocks(
385
+ title="Do I really need a huge LLM?",
386
+ theme=gr.themes.Base(
387
+ primary_hue="blue",
388
+ secondary_hue="gray",
389
+ neutral_hue="gray",
390
+ font=["Inter", "system-ui", "sans-serif"]
391
+ ),
392
+ css=custom_css
393
+ ) as demo:
394
+ with gr.Row():
395
+ with gr.Column(scale=3):
396
+ gr.Markdown("""
397
+ <div style="margin-bottom: 30px;">
398
+ <h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
399
+ Do I really need a huge LLM?
400
+ </h1>
401
+ <p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
402
+ Let's find out! This tool automatically routes your queries to the right-sized model.
403
+ </p>
404
+ </div>
405
+ """)
406
+
407
+ with gr.Row():
408
+ with gr.Column(scale=3):
409
+ chatbot = gr.Chatbot(
410
+ height=500,
411
+ show_label=False,
412
+ container=True,
413
+ elem_classes=["chat-container"]
414
+ )
415
+
416
+ with gr.Row():
417
+ msg = gr.Textbox(
418
+ placeholder="Type your message here...",
419
+ show_label=False,
420
+ scale=9,
421
+ container=False,
422
+ elem_classes=["message-input"]
423
+ )
424
+ submit = gr.Button(
425
+ "Send",
426
+ variant="primary",
427
+ scale=1,
428
+ min_width=100
429
+ )
430
+
431
+ with gr.Column(scale=2):
432
+ # Model selection display
433
+ model_display = gr.HTML(
434
+ value="""
435
+ <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
436
+ <p style="margin: 0;">Model selection will appear here</p>
437
+ </div>
438
+ """,
439
+ label="Selected Model"
440
+ )
441
+
442
+ # Savings metrics
443
+ savings_display = gr.HTML(
444
+ value="""
445
+ <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
446
+ <p style="margin: 0;">Efficiency metrics will appear here</p>
447
+ </div>
448
+ """,
449
+ label="Efficiency Metrics"
450
+ )
451
+
452
+ # Cumulative stats
453
+ stats_display = gr.HTML(
454
+ value=get_statistics(),
455
+ label="Your Impact Dashboard"
456
+ )
457
+
458
+ # Footer with minimal info
459
+ with gr.Row():
460
+ gr.Markdown("""
461
+ <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
462
+ <p style="margin: 5px 0;">Comparing small vs large model efficiency • Real-time tracking • Environmental impact monitoring</p>
463
+ </div>
464
+ """)
465
+
466
+ def respond(message, chat_history):
467
+ response, model_info, savings = process_message(message, chat_history)
468
+ chat_history.append((message, response))
469
+ return "", chat_history, model_info, savings, get_statistics()
470
+
471
+ msg.submit(respond, [msg, chatbot], [msg, chatbot, model_display, savings_display, stats_display])
472
+ submit.click(respond, [msg, chatbot], [msg, chatbot, model_display, savings_display, stats_display])
473
+
474
+ # Clear button functionality
475
+ def clear_chat():
476
+ return None, """
477
+ <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
478
+ <p style="margin: 0;">Model selection will appear here</p>
479
+ </div>
480
+ """, """
481
+ <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
482
+ <p style="margin: 0;">Efficiency metrics will appear here</p>
483
+ </div>
484
+ """, get_statistics()
485
+
486
+ # Add clear functionality to the Enter key
487
+ msg.submit(lambda: "", outputs=[msg])
488
+
489
+ if __name__ == "__main__":
490
+ print(f"\n{'='*60}")
491
+ print(f" DO I REALLY NEED A HUGE LLM? - STARTUP")
492
+ print(f"{'='*60}")
493
+ print(f"[LAUNCH] Starting Gradio app...")
494
+ print(f"[LAUNCH] Environment: TOKENIZERS_PARALLELISM={os.environ.get('TOKENIZERS_PARALLELISM')}")
495
+ print(f"[LAUNCH] Models configured:")
496
+ for k, v in MODEL_CONFIGS.items():
497
+ print(f" - {k}: {v['name']} ({v['icon']})")
498
+ print(f"[LAUNCH] OpenRouter API Key: {'✓ SET' if OPENROUTER_API_KEY else '✗ NOT SET (Demo Mode)'}")
499
+ print(f"{'='*60}\n")
500
+ demo.launch(share=False)
bertmodel.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_text_classifier.py
2
+
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import torch
5
+ from typing import Optional
6
+ from pathlib import Path
7
+
8
+ # Use the model from Hugging Face
9
+ _MODEL_NAME = "monsimas/ModernBERT-ecoRouter"
10
+
11
+ # Load tokenizer and model at import time so we don’t pay the I/O cost on every call
12
+ try:
13
+ _TOKENIZER = AutoTokenizer.from_pretrained(_MODEL_NAME)
14
+ except Exception as e:
15
+ raise RuntimeError(f"Failed to load tokenizer from {_MODEL_NAME}: {e}")
16
+
17
+ try:
18
+ _MODEL = AutoModelForSequenceClassification.from_pretrained(_MODEL_NAME)
19
+ _MODEL.eval() # evaluation mode
20
+ except Exception as e:
21
+ raise RuntimeError(f"Failed to load model from {_MODEL_NAME}: {e}")
22
+
23
+
24
+ def predict_label(
25
+ text: str,
26
+ tokenizer: Optional[AutoTokenizer] = None,
27
+ model: Optional[AutoModelForSequenceClassification] = None
28
+ ) -> str:
29
+ """
30
+ Classify a single string and return the predicted label.
31
+
32
+ Args:
33
+ text: The input text to classify.
34
+ tokenizer: (optional) tokenizer instance; defaults to the module-level tokenizer.
35
+ model: (optional) model instance; defaults to the module-level model.
36
+
37
+ Returns:
38
+ The name of the predicted class (as per `model.config.id2label`).
39
+
40
+ Raises:
41
+ ValueError: if the model’s config has no `id2label` mapping.
42
+ """
43
+ tok = tokenizer or _TOKENIZER
44
+ mdl = model or _MODEL
45
+
46
+ # Tokenize
47
+ inputs = tok([text], padding=True, truncation=True, return_tensors="pt")
48
+
49
+ # Inference
50
+ with torch.no_grad():
51
+ outputs = mdl(**inputs)
52
+
53
+ logits = outputs.logits
54
+ pred_id = torch.argmax(logits, dim=-1).item()
55
+
56
+ # Map to label name
57
+ if not hasattr(mdl.config, "id2label") or len(mdl.config.id2label) == 0:
58
+ raise ValueError("Model config does not contain an id2label mapping.")
59
+ return mdl.config.id2label[pred_id]
60
+
61
+ """
62
+ # Example usage
63
+ if __name__ == "__main__":
64
+ sample = "This is an example sentence to classify."
65
+ label = predict_label(sample)
66
+ print(f"Input: {sample}\nPredicted label: {label}")
67
+
68
+ """
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==4.16.0
2
+ python-dotenv
3
+ transformers
4
+ torch
5
+ requests