gradio.chat.app-HFIPs

Sleeping

App Files Files Community

ysharma HF Staff commited on 27 days ago

Commit

244107e

verified ·

1 Parent(s): 84f8ce7

Update config.py

Browse files

Files changed (1) hide show

config.py +256 -120

config.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """
-Configuration module for Universal MCP Client
-Enhanced with HuggingFace Inference Provider support
 """
 import os
 from dataclasses import dataclass
@@ -22,112 +21,180 @@ class MCPServerConfig:
 class AppConfig:
     """Application configuration settings"""
-    # API Configuration
-    ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
     HF_TOKEN = os.getenv("HF_TOKEN")
-    # Model Configuration
-    CLAUDE_MODEL = "claude-sonnet-4-20250514"
-    MAX_TOKENS = 2048
-    # MCP Configuration
-    MCP_BETA_VERSION = "mcp-client-2025-04-04"
-    MCP_TIMEOUT_SECONDS = 20.0
-    # UI Configuration
-    GRADIO_THEME = "citrus"
-    DEBUG_MODE = True
-    # File Support
-    SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp']
-    SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac']
-    SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov']
-    SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx']
-    # Inference Providers Configuration
     INFERENCE_PROVIDERS = {
-        "sambanova": {
-            "name": "SambaNova",
-            "description": "Ultra-fast inference on optimized hardware",
-            "supports_tools": True,
-            "models": [
-                "meta-llama/Llama-3.3-70B-Instruct",
-                "deepseek-ai/DeepSeek-R1-0528",
-                "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
-                "intfloat/e5-mistral-7b-instruct"
-            ]
-        },
-        "together": {
-            "name": "Together AI",
-            "description": "High-performance inference for open models",
-            "supports_tools": True,
-            "models": [
-                "deepseek-ai/DeepSeek-V3-0324",
-                "Qwen/Qwen2.5-72B-Instruct",
-                "meta-llama/Llama-3.1-8B-Instruct",
-                "black-forest-labs/FLUX.1-dev"
-            ]
-        },
-        "replicate": {
-            "name": "Replicate",
-            "description": "Run AI models in the cloud",
-            "supports_tools": True,
-            "models": [
-                "meta/llama-2-70b-chat",
-                "mistralai/mixtral-8x7b-instruct-v0.1",
-                "black-forest-labs/flux-schnell"
-            ]
-        },
-        "groq": {
-            "name": "Groq",
-            "description": "Ultra-low latency LPU inference",
-            "supports_tools": True,
-            "models": [
-                "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-                "llama-3.1-70b-versatile",
-                "mixtral-8x7b-32768"
-            ]
-        },
-        "fal-ai": {
-            "name": "fal.ai",
-            "description": "Fast AI model inference",
-            "supports_tools": True,
-            "models": [
-                "meta-llama/Llama-3.1-8B-Instruct",
-                "black-forest-labs/flux-pro"
-            ]
         },
         "fireworks-ai": {
             "name": "Fireworks AI",
-            "description": "Production-ready inference platform",
-            "supports_tools": True,
-            "models": [
-                "accounts/fireworks/models/llama-v3p1-70b-instruct",
-                "accounts/fireworks/models/mixtral-8x7b-instruct"
-            ]
         },
-        "cohere": {
-            "name": "Cohere",
-            "description": "Enterprise-grade language AI",
-            "supports_tools": True,
-            "models": [
-                "command-r-plus",
-                "command-r",
-                "command"
-            ]
         },
-        "hf-inference": {
-            "name": "HF Inference",
-            "description": "Hugging Face serverless inference",
-            "supports_tools": True,
-            "models": [
-                "meta-llama/Llama-3.2-11B-Vision-Instruct",
-                "microsoft/DialoGPT-medium",
-                "intfloat/multilingual-e5-large"
-            ]
         }
     }
     @classmethod
     def get_all_media_extensions(cls):
         """Get all supported media file extensions"""
@@ -138,32 +205,43 @@ class AppConfig:
     @classmethod
     def is_image_file(cls, file_path: str) -> bool:
         """Check if file is an image"""
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
     @classmethod
     def is_audio_file(cls, file_path: str) -> bool:
         """Check if file is an audio file"""
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
     @classmethod
     def is_video_file(cls, file_path: str) -> bool:
         """Check if file is a video file"""
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
     @classmethod
     def is_media_file(cls, file_path: str) -> bool:
         """Check if file is any supported media type"""
         return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
-    @classmethod
-    def get_provider_models(cls, provider: str) -> List[str]:
-        """Get available models for a specific provider"""
-        return cls.INFERENCE_PROVIDERS.get(provider, {}).get("models", [])
     @classmethod
-    def get_all_providers(cls) -> Dict[str, Dict]:
-        """Get all available inference providers"""
-        return cls.INFERENCE_PROVIDERS
 # Check for dependencies
 try:
@@ -174,13 +252,13 @@ except ImportError:
     logger.warning("httpx not available - file upload functionality limited")
 try:
-    from huggingface_hub import InferenceClient
-    HF_INFERENCE_AVAILABLE = True
 except ImportError:
-    HF_INFERENCE_AVAILABLE = False
-    logger.warning("huggingface_hub not available - inference provider functionality limited")
-# CSS Configuration
 CUSTOM_CSS = """
 /* Hide Gradio footer */
 footer {
@@ -208,19 +286,77 @@ footer {
     margin-bottom: 0 !important;
     padding-bottom: 0 !important;
 }
-/* Provider selection styling */
-.provider-selection {
-    border: 1px solid #e0e0e0;
     border-radius: 8px;
     padding: 10px;
-    margin: 5px 0;
 }
-.anthropic-config {
-    background-color: #f8f9fa;
-    border-left: 4px solid #28a745;
 }
-.hf-config {
-    background-color: #fff8e1;
-    border-left: 4px solid #ff9800;
 }
-"""

 """
+Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
 """
 import os
 from dataclasses import dataclass
 class AppConfig:
     """Application configuration settings"""
+    # HuggingFace Configuration
     HF_TOKEN = os.getenv("HF_TOKEN")
+    # OpenAI GPT OSS Models with enhanced configurations
+    AVAILABLE_MODELS = {
+        "openai/gpt-oss-120b": {
+            "name": "GPT OSS 120B",
+            "description": "117B parameters, 5.1B active - Production use with reasoning",
+            "size": "120B",
+            "context_length": 128000,  # Full 128k context length
+            "supports_reasoning": True,
+            "supports_tool_calling": True,
+            "active_params": "5.1B"
+        },
+        "openai/gpt-oss-20b": {
+            "name": "GPT OSS 20B",
+            "description": "21B parameters, 3.6B active - Lower latency with reasoning",
+            "size": "20B",
+            "context_length": 128000,  # Full 128k context length
+            "supports_reasoning": True,
+            "supports_tool_calling": True,
+            "active_params": "3.6B"
+        }
+    }
+    # Enhanced Inference Providers supporting GPT OSS models
     INFERENCE_PROVIDERS = {
+        "cerebras": {
+            "name": "Cerebras",
+            "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "cerebras",
+            "speed": "Very Fast",
+            "recommended_for": ["production", "high-throughput"],
+            "max_context_support": 128000  # Full context support
         },
         "fireworks-ai": {
             "name": "Fireworks AI",
+            "description": "Fast and reliable inference with excellent reliability",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "fireworks-ai",
+            "speed": "Fast",
+            "recommended_for": ["production", "general-use"],
+            "max_context_support": 128000  # Full context support
         },
+        "together-ai": {
+            "name": "Together AI",
+            "description": "Collaborative AI inference with good performance",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "together-ai",
+            "speed": "Fast",
+            "recommended_for": ["development", "experimentation"],
+            "max_context_support": 128000  # Full context support
         },
+        "replicate": {
+            "name": "Replicate",
+            "description": "Machine learning deployment platform",
+            "supports_120b": True,
+            "supports_20b": True,
+            "endpoint_suffix": "replicate",
+            "speed": "Medium",
+            "recommended_for": ["prototyping", "low-volume"],
+            "max_context_support": 128000  # Full context support
         }
     }
+    # Enhanced Model Configuration for GPT-OSS - Utilizing full context
+    MAX_TOKENS = 128000  # Full context length for GPT-OSS models
+    # Response token allocation - increased for longer responses
+    DEFAULT_MAX_RESPONSE_TOKENS = 16384  # Increased from 8192 for longer responses
+    MIN_RESPONSE_TOKENS = 4096  # Minimum response size
+    # Context management - optimized for full 128k usage
+    SYSTEM_PROMPT_RESERVE = 3000  # Reserve for system prompt (includes MCP tool descriptions)
+    MCP_TOOLS_RESERVE = 2000  # Additional reserve when MCP servers are enabled
+    # History management - much larger with 128k context
+    MAX_HISTORY_MESSAGES = 100  # Increased from 50 for better context retention
+    DEFAULT_HISTORY_MESSAGES = 50  # Default for good performance
+    # Reasoning configuration
+    DEFAULT_REASONING_EFFORT = "medium"  # low, medium, high
+    # UI Configuration
+    GRADIO_THEME = "ocean"
+    DEBUG_MODE = True
+    # MCP Server recommendations
+    OPTIMAL_MCP_SERVER_COUNT = 6  # Recommended maximum for good performance
+    WARNING_MCP_SERVER_COUNT = 10  # Show warning if more than this
+    # File Support
+    SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
+    SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
+    SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
+    SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
+    @classmethod
+    def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
+        """Get models available for a specific provider"""
+        if provider_id not in cls.INFERENCE_PROVIDERS:
+            return []
+        provider = cls.INFERENCE_PROVIDERS[provider_id]
+        available_models = []
+        for model_id, model_info in cls.AVAILABLE_MODELS.items():
+            if model_info["size"] == "120B" and provider["supports_120b"]:
+                available_models.append(model_id)
+            elif model_info["size"] == "20B" and provider["supports_20b"]:
+                available_models.append(model_id)
+        return available_models
+    @classmethod
+    def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
+        """Get the full model endpoint for HF Inference Providers"""
+        if provider_id not in cls.INFERENCE_PROVIDERS:
+            raise ValueError(f"Unknown provider: {provider_id}")
+        provider = cls.INFERENCE_PROVIDERS[provider_id]
+        return f"{model_id}:{provider['endpoint_suffix']}"
+    @classmethod
+    def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
+        """Get optimal context settings for a model/provider combination"""
+        model_info = cls.AVAILABLE_MODELS.get(model_id, {})
+        provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
+        # Get the minimum of model and provider context support
+        model_context = model_info.get("context_length", 128000)
+        provider_context = provider_info.get("max_context_support", 128000)
+        context_length = min(model_context, provider_context)
+        # Calculate reserves based on MCP server count
+        system_reserve = cls.SYSTEM_PROMPT_RESERVE
+        if mcp_servers_count > 0:
+            # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
+            system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
+        # Dynamic response token allocation based on available context
+        if context_length >= 100000:
+            max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS  # 16384
+        elif context_length >= 50000:
+            max_response_tokens = 12288
+        elif context_length >= 20000:
+            max_response_tokens = 8192
+        else:
+            max_response_tokens = cls.MIN_RESPONSE_TOKENS  # 4096
+        # Calculate available context for history
+        available_context = context_length - system_reserve - max_response_tokens
+        # Calculate recommended history limit
+        # Assume average message is ~200 tokens
+        avg_message_tokens = 200
+        recommended_history = min(
+            cls.MAX_HISTORY_MESSAGES,
+            available_context // avg_message_tokens
+        )
+        return {
+            "max_context": context_length,
+            "available_context": available_context,
+            "max_response_tokens": max_response_tokens,
+            "system_reserve": system_reserve,
+            "recommended_history_limit": max(10, recommended_history),  # At least 10 messages
+            "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
+        }
     @classmethod
     def get_all_media_extensions(cls):
         """Get all supported media file extensions"""
     @classmethod
     def is_image_file(cls, file_path: str) -> bool:
         """Check if file is an image"""
+        if not file_path:
+            return False
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
     @classmethod
     def is_audio_file(cls, file_path: str) -> bool:
         """Check if file is an audio file"""
+        if not file_path:
+            return False
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
     @classmethod
     def is_video_file(cls, file_path: str) -> bool:
         """Check if file is a video file"""
+        if not file_path:
+            return False
         return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
     @classmethod
     def is_media_file(cls, file_path: str) -> bool:
         """Check if file is any supported media type"""
+        if not file_path:
+            return False
         return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
     @classmethod
+    def get_provider_recommendation(cls, use_case: str) -> List[str]:
+        """Get recommended providers for specific use cases"""
+        recommendations = {
+            "production": ["cerebras", "fireworks-ai"],
+            "development": ["together-ai", "fireworks-ai"],
+            "experimentation": ["together-ai", "replicate"],
+            "high-throughput": ["cerebras"],
+            "cost-effective": ["together-ai", "replicate"],
+            "maximum-context": ["cerebras", "fireworks-ai"]  # Providers with best context support
+        }
+        return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
 # Check for dependencies
 try:
     logger.warning("httpx not available - file upload functionality limited")
 try:
+    import huggingface_hub
+    HF_HUB_AVAILABLE = True
 except ImportError:
+    HF_HUB_AVAILABLE = False
+    logger.warning("huggingface_hub not available - login functionality disabled")
+# Enhanced CSS Configuration with better media display
 CUSTOM_CSS = """
 /* Hide Gradio footer */
 footer {
     margin-bottom: 0 !important;
     padding-bottom: 0 !important;
 }
+/* Provider and model selection styling */
+.provider-model-selection {
+    padding: 10px;
     border-radius: 8px;
+    margin-bottom: 10px;
+    border-left: 4px solid #007bff;
+}
+/* Login section styling */
+.login-section {
     padding: 10px;
+    border-radius: 8px;
+    margin-bottom: 10px;
+    border-left: 4px solid #4caf50;
 }
+/* Tool usage indicator */
+.tool-usage {
+    background: #fff3cd;
+    border: 1px solid #ffeaa7;
+    border-radius: 4px;
+    padding: 8px;
+    margin: 4px 0;
 }
+/* Media display improvements */
+.media-container {
+    max-width: 100%;
+    border-radius: 8px;
+    overflow: hidden;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+}
+/* Enhanced audio player styling */
+audio {
+    width: 100%;
+    max-width: 500px;
+    height: 54px;
+    border-radius: 27px;
+    outline: none;
+    margin: 10px 0;
 }
+/* Enhanced video player styling */
+video {
+    width: 100%;
+    max-width: 700px;
+    height: auto;
+    object-fit: contain;
+    border-radius: 8px;
+    margin: 10px 0;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+}
+/* Server status indicators */
+.server-status {
+    display: inline-block;
+    padding: 2px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    font-weight: bold;
+}
+.server-status.online {
+    background: #d4edda;
+    color: #155724;
+}
+.server-status.offline {
+    background: #f8d7da;
+    color: #721c24;
+}
+/* Message metadata styling */
+.message-metadata {
+    font-size: 0.85em;
+    color: #666;
+    margin-top: 4px;
+    padding: 4px 8px;
+    background: #f0f0f0;
+    border-radius: 4px;
+}
+"""