ysharma HF Staff commited on
Commit
244107e
·
verified ·
1 Parent(s): 84f8ce7

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +256 -120
config.py CHANGED
@@ -1,6 +1,5 @@
1
  """
2
- Configuration module for Universal MCP Client
3
- Enhanced with HuggingFace Inference Provider support
4
  """
5
  import os
6
  from dataclasses import dataclass
@@ -22,112 +21,180 @@ class MCPServerConfig:
22
  class AppConfig:
23
  """Application configuration settings"""
24
 
25
- # API Configuration
26
- ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
27
  HF_TOKEN = os.getenv("HF_TOKEN")
28
 
29
- # Model Configuration
30
- CLAUDE_MODEL = "claude-sonnet-4-20250514"
31
- MAX_TOKENS = 2048
32
-
33
- # MCP Configuration
34
- MCP_BETA_VERSION = "mcp-client-2025-04-04"
35
- MCP_TIMEOUT_SECONDS = 20.0
36
-
37
- # UI Configuration
38
- GRADIO_THEME = "citrus"
39
- DEBUG_MODE = True
40
-
41
- # File Support
42
- SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp']
43
- SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac']
44
- SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov']
45
- SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx']
 
 
 
 
46
 
47
- # Inference Providers Configuration
48
  INFERENCE_PROVIDERS = {
49
- "sambanova": {
50
- "name": "SambaNova",
51
- "description": "Ultra-fast inference on optimized hardware",
52
- "supports_tools": True,
53
- "models": [
54
- "meta-llama/Llama-3.3-70B-Instruct",
55
- "deepseek-ai/DeepSeek-R1-0528",
56
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
57
- "intfloat/e5-mistral-7b-instruct"
58
- ]
59
- },
60
- "together": {
61
- "name": "Together AI",
62
- "description": "High-performance inference for open models",
63
- "supports_tools": True,
64
- "models": [
65
- "deepseek-ai/DeepSeek-V3-0324",
66
- "Qwen/Qwen2.5-72B-Instruct",
67
- "meta-llama/Llama-3.1-8B-Instruct",
68
- "black-forest-labs/FLUX.1-dev"
69
- ]
70
- },
71
- "replicate": {
72
- "name": "Replicate",
73
- "description": "Run AI models in the cloud",
74
- "supports_tools": True,
75
- "models": [
76
- "meta/llama-2-70b-chat",
77
- "mistralai/mixtral-8x7b-instruct-v0.1",
78
- "black-forest-labs/flux-schnell"
79
- ]
80
- },
81
- "groq": {
82
- "name": "Groq",
83
- "description": "Ultra-low latency LPU inference",
84
- "supports_tools": True,
85
- "models": [
86
- "meta-llama/Llama-4-Scout-17B-16E-Instruct",
87
- "llama-3.1-70b-versatile",
88
- "mixtral-8x7b-32768"
89
- ]
90
- },
91
- "fal-ai": {
92
- "name": "fal.ai",
93
- "description": "Fast AI model inference",
94
- "supports_tools": True,
95
- "models": [
96
- "meta-llama/Llama-3.1-8B-Instruct",
97
- "black-forest-labs/flux-pro"
98
- ]
99
  },
100
  "fireworks-ai": {
101
  "name": "Fireworks AI",
102
- "description": "Production-ready inference platform",
103
- "supports_tools": True,
104
- "models": [
105
- "accounts/fireworks/models/llama-v3p1-70b-instruct",
106
- "accounts/fireworks/models/mixtral-8x7b-instruct"
107
- ]
 
108
  },
109
- "cohere": {
110
- "name": "Cohere",
111
- "description": "Enterprise-grade language AI",
112
- "supports_tools": True,
113
- "models": [
114
- "command-r-plus",
115
- "command-r",
116
- "command"
117
- ]
118
  },
119
- "hf-inference": {
120
- "name": "HF Inference",
121
- "description": "Hugging Face serverless inference",
122
- "supports_tools": True,
123
- "models": [
124
- "meta-llama/Llama-3.2-11B-Vision-Instruct",
125
- "microsoft/DialoGPT-medium",
126
- "intfloat/multilingual-e5-large"
127
- ]
128
  }
129
  }
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  @classmethod
132
  def get_all_media_extensions(cls):
133
  """Get all supported media file extensions"""
@@ -138,32 +205,43 @@ class AppConfig:
138
  @classmethod
139
  def is_image_file(cls, file_path: str) -> bool:
140
  """Check if file is an image"""
 
 
141
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
142
 
143
  @classmethod
144
  def is_audio_file(cls, file_path: str) -> bool:
145
  """Check if file is an audio file"""
 
 
146
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
147
 
148
  @classmethod
149
  def is_video_file(cls, file_path: str) -> bool:
150
  """Check if file is a video file"""
 
 
151
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
152
 
153
  @classmethod
154
  def is_media_file(cls, file_path: str) -> bool:
155
  """Check if file is any supported media type"""
 
 
156
  return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
157
-
158
- @classmethod
159
- def get_provider_models(cls, provider: str) -> List[str]:
160
- """Get available models for a specific provider"""
161
- return cls.INFERENCE_PROVIDERS.get(provider, {}).get("models", [])
162
 
163
  @classmethod
164
- def get_all_providers(cls) -> Dict[str, Dict]:
165
- """Get all available inference providers"""
166
- return cls.INFERENCE_PROVIDERS
 
 
 
 
 
 
 
 
167
 
168
  # Check for dependencies
169
  try:
@@ -174,13 +252,13 @@ except ImportError:
174
  logger.warning("httpx not available - file upload functionality limited")
175
 
176
  try:
177
- from huggingface_hub import InferenceClient
178
- HF_INFERENCE_AVAILABLE = True
179
  except ImportError:
180
- HF_INFERENCE_AVAILABLE = False
181
- logger.warning("huggingface_hub not available - inference provider functionality limited")
182
 
183
- # CSS Configuration
184
  CUSTOM_CSS = """
185
  /* Hide Gradio footer */
186
  footer {
@@ -208,19 +286,77 @@ footer {
208
  margin-bottom: 0 !important;
209
  padding-bottom: 0 !important;
210
  }
211
- /* Provider selection styling */
212
- .provider-selection {
213
- border: 1px solid #e0e0e0;
214
  border-radius: 8px;
 
 
 
 
 
215
  padding: 10px;
216
- margin: 5px 0;
 
 
217
  }
218
- .anthropic-config {
219
- background-color: #f8f9fa;
220
- border-left: 4px solid #28a745;
 
 
 
 
221
  }
222
- .hf-config {
223
- background-color: #fff8e1;
224
- border-left: 4px solid #ff9800;
 
 
 
 
 
 
 
 
 
 
 
 
225
  }
226
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
 
3
  """
4
  import os
5
  from dataclasses import dataclass
 
21
  class AppConfig:
22
  """Application configuration settings"""
23
 
24
+ # HuggingFace Configuration
 
25
  HF_TOKEN = os.getenv("HF_TOKEN")
26
 
27
+ # OpenAI GPT OSS Models with enhanced configurations
28
+ AVAILABLE_MODELS = {
29
+ "openai/gpt-oss-120b": {
30
+ "name": "GPT OSS 120B",
31
+ "description": "117B parameters, 5.1B active - Production use with reasoning",
32
+ "size": "120B",
33
+ "context_length": 128000, # Full 128k context length
34
+ "supports_reasoning": True,
35
+ "supports_tool_calling": True,
36
+ "active_params": "5.1B"
37
+ },
38
+ "openai/gpt-oss-20b": {
39
+ "name": "GPT OSS 20B",
40
+ "description": "21B parameters, 3.6B active - Lower latency with reasoning",
41
+ "size": "20B",
42
+ "context_length": 128000, # Full 128k context length
43
+ "supports_reasoning": True,
44
+ "supports_tool_calling": True,
45
+ "active_params": "3.6B"
46
+ }
47
+ }
48
 
49
+ # Enhanced Inference Providers supporting GPT OSS models
50
  INFERENCE_PROVIDERS = {
51
+ "cerebras": {
52
+ "name": "Cerebras",
53
+ "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
54
+ "supports_120b": True,
55
+ "supports_20b": True,
56
+ "endpoint_suffix": "cerebras",
57
+ "speed": "Very Fast",
58
+ "recommended_for": ["production", "high-throughput"],
59
+ "max_context_support": 128000 # Full context support
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  },
61
  "fireworks-ai": {
62
  "name": "Fireworks AI",
63
+ "description": "Fast and reliable inference with excellent reliability",
64
+ "supports_120b": True,
65
+ "supports_20b": True,
66
+ "endpoint_suffix": "fireworks-ai",
67
+ "speed": "Fast",
68
+ "recommended_for": ["production", "general-use"],
69
+ "max_context_support": 128000 # Full context support
70
  },
71
+ "together-ai": {
72
+ "name": "Together AI",
73
+ "description": "Collaborative AI inference with good performance",
74
+ "supports_120b": True,
75
+ "supports_20b": True,
76
+ "endpoint_suffix": "together-ai",
77
+ "speed": "Fast",
78
+ "recommended_for": ["development", "experimentation"],
79
+ "max_context_support": 128000 # Full context support
80
  },
81
+ "replicate": {
82
+ "name": "Replicate",
83
+ "description": "Machine learning deployment platform",
84
+ "supports_120b": True,
85
+ "supports_20b": True,
86
+ "endpoint_suffix": "replicate",
87
+ "speed": "Medium",
88
+ "recommended_for": ["prototyping", "low-volume"],
89
+ "max_context_support": 128000 # Full context support
90
  }
91
  }
92
 
93
+ # Enhanced Model Configuration for GPT-OSS - Utilizing full context
94
+ MAX_TOKENS = 128000 # Full context length for GPT-OSS models
95
+
96
+ # Response token allocation - increased for longer responses
97
+ DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
98
+ MIN_RESPONSE_TOKENS = 4096 # Minimum response size
99
+
100
+ # Context management - optimized for full 128k usage
101
+ SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
102
+ MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
103
+
104
+ # History management - much larger with 128k context
105
+ MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
106
+ DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
107
+
108
+ # Reasoning configuration
109
+ DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
110
+
111
+ # UI Configuration
112
+ GRADIO_THEME = "ocean"
113
+ DEBUG_MODE = True
114
+
115
+ # MCP Server recommendations
116
+ OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
117
+ WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
118
+
119
+ # File Support
120
+ SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
121
+ SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
122
+ SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
123
+ SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
124
+
125
+ @classmethod
126
+ def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
127
+ """Get models available for a specific provider"""
128
+ if provider_id not in cls.INFERENCE_PROVIDERS:
129
+ return []
130
+
131
+ provider = cls.INFERENCE_PROVIDERS[provider_id]
132
+ available_models = []
133
+
134
+ for model_id, model_info in cls.AVAILABLE_MODELS.items():
135
+ if model_info["size"] == "120B" and provider["supports_120b"]:
136
+ available_models.append(model_id)
137
+ elif model_info["size"] == "20B" and provider["supports_20b"]:
138
+ available_models.append(model_id)
139
+
140
+ return available_models
141
+
142
+ @classmethod
143
+ def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
144
+ """Get the full model endpoint for HF Inference Providers"""
145
+ if provider_id not in cls.INFERENCE_PROVIDERS:
146
+ raise ValueError(f"Unknown provider: {provider_id}")
147
+
148
+ provider = cls.INFERENCE_PROVIDERS[provider_id]
149
+ return f"{model_id}:{provider['endpoint_suffix']}"
150
+
151
+ @classmethod
152
+ def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
153
+ """Get optimal context settings for a model/provider combination"""
154
+ model_info = cls.AVAILABLE_MODELS.get(model_id, {})
155
+ provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
156
+
157
+ # Get the minimum of model and provider context support
158
+ model_context = model_info.get("context_length", 128000)
159
+ provider_context = provider_info.get("max_context_support", 128000)
160
+ context_length = min(model_context, provider_context)
161
+
162
+ # Calculate reserves based on MCP server count
163
+ system_reserve = cls.SYSTEM_PROMPT_RESERVE
164
+ if mcp_servers_count > 0:
165
+ # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
166
+ system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
167
+
168
+ # Dynamic response token allocation based on available context
169
+ if context_length >= 100000:
170
+ max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
171
+ elif context_length >= 50000:
172
+ max_response_tokens = 12288
173
+ elif context_length >= 20000:
174
+ max_response_tokens = 8192
175
+ else:
176
+ max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
177
+
178
+ # Calculate available context for history
179
+ available_context = context_length - system_reserve - max_response_tokens
180
+
181
+ # Calculate recommended history limit
182
+ # Assume average message is ~200 tokens
183
+ avg_message_tokens = 200
184
+ recommended_history = min(
185
+ cls.MAX_HISTORY_MESSAGES,
186
+ available_context // avg_message_tokens
187
+ )
188
+
189
+ return {
190
+ "max_context": context_length,
191
+ "available_context": available_context,
192
+ "max_response_tokens": max_response_tokens,
193
+ "system_reserve": system_reserve,
194
+ "recommended_history_limit": max(10, recommended_history), # At least 10 messages
195
+ "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
196
+ }
197
+
198
  @classmethod
199
  def get_all_media_extensions(cls):
200
  """Get all supported media file extensions"""
 
205
  @classmethod
206
  def is_image_file(cls, file_path: str) -> bool:
207
  """Check if file is an image"""
208
+ if not file_path:
209
+ return False
210
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
211
 
212
  @classmethod
213
  def is_audio_file(cls, file_path: str) -> bool:
214
  """Check if file is an audio file"""
215
+ if not file_path:
216
+ return False
217
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
218
 
219
  @classmethod
220
  def is_video_file(cls, file_path: str) -> bool:
221
  """Check if file is a video file"""
222
+ if not file_path:
223
+ return False
224
  return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
225
 
226
  @classmethod
227
  def is_media_file(cls, file_path: str) -> bool:
228
  """Check if file is any supported media type"""
229
+ if not file_path:
230
+ return False
231
  return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
 
 
 
 
 
232
 
233
  @classmethod
234
+ def get_provider_recommendation(cls, use_case: str) -> List[str]:
235
+ """Get recommended providers for specific use cases"""
236
+ recommendations = {
237
+ "production": ["cerebras", "fireworks-ai"],
238
+ "development": ["together-ai", "fireworks-ai"],
239
+ "experimentation": ["together-ai", "replicate"],
240
+ "high-throughput": ["cerebras"],
241
+ "cost-effective": ["together-ai", "replicate"],
242
+ "maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
243
+ }
244
+ return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
245
 
246
  # Check for dependencies
247
  try:
 
252
  logger.warning("httpx not available - file upload functionality limited")
253
 
254
  try:
255
+ import huggingface_hub
256
+ HF_HUB_AVAILABLE = True
257
  except ImportError:
258
+ HF_HUB_AVAILABLE = False
259
+ logger.warning("huggingface_hub not available - login functionality disabled")
260
 
261
+ # Enhanced CSS Configuration with better media display
262
  CUSTOM_CSS = """
263
  /* Hide Gradio footer */
264
  footer {
 
286
  margin-bottom: 0 !important;
287
  padding-bottom: 0 !important;
288
  }
289
+ /* Provider and model selection styling */
290
+ .provider-model-selection {
291
+ padding: 10px;
292
  border-radius: 8px;
293
+ margin-bottom: 10px;
294
+ border-left: 4px solid #007bff;
295
+ }
296
+ /* Login section styling */
297
+ .login-section {
298
  padding: 10px;
299
+ border-radius: 8px;
300
+ margin-bottom: 10px;
301
+ border-left: 4px solid #4caf50;
302
  }
303
+ /* Tool usage indicator */
304
+ .tool-usage {
305
+ background: #fff3cd;
306
+ border: 1px solid #ffeaa7;
307
+ border-radius: 4px;
308
+ padding: 8px;
309
+ margin: 4px 0;
310
  }
311
+ /* Media display improvements */
312
+ .media-container {
313
+ max-width: 100%;
314
+ border-radius: 8px;
315
+ overflow: hidden;
316
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
317
+ }
318
+ /* Enhanced audio player styling */
319
+ audio {
320
+ width: 100%;
321
+ max-width: 500px;
322
+ height: 54px;
323
+ border-radius: 27px;
324
+ outline: none;
325
+ margin: 10px 0;
326
  }
327
+ /* Enhanced video player styling */
328
+ video {
329
+ width: 100%;
330
+ max-width: 700px;
331
+ height: auto;
332
+ object-fit: contain;
333
+ border-radius: 8px;
334
+ margin: 10px 0;
335
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
336
+ }
337
+ /* Server status indicators */
338
+ .server-status {
339
+ display: inline-block;
340
+ padding: 2px 8px;
341
+ border-radius: 12px;
342
+ font-size: 12px;
343
+ font-weight: bold;
344
+ }
345
+ .server-status.online {
346
+ background: #d4edda;
347
+ color: #155724;
348
+ }
349
+ .server-status.offline {
350
+ background: #f8d7da;
351
+ color: #721c24;
352
+ }
353
+ /* Message metadata styling */
354
+ .message-metadata {
355
+ font-size: 0.85em;
356
+ color: #666;
357
+ margin-top: 4px;
358
+ padding: 4px 8px;
359
+ background: #f0f0f0;
360
+ border-radius: 4px;
361
+ }
362
+ """