ysharma HF Staff commited on
Commit
23f3cfe
·
verified ·
1 Parent(s): 244107e

Update mcp_client.py

Browse files
Files changed (1) hide show
  1. mcp_client.py +499 -699
mcp_client.py CHANGED
@@ -1,85 +1,75 @@
1
  """
2
- MCP Client implementation for Universal MCP Client
3
- Enhanced with HuggingFace Inference Provider support
4
  """
5
  import asyncio
6
  import json
7
  import re
8
- import base64
9
- from typing import Dict, Optional, Tuple, List, Any
10
- import anthropic
11
  import logging
12
  import traceback
 
 
13
 
14
  # Import the proper MCP client components
15
  from mcp import ClientSession
16
  from mcp.client.sse import sse_client
17
 
18
- from config import MCPServerConfig, AppConfig, HTTPX_AVAILABLE, HF_INFERENCE_AVAILABLE
19
 
20
  logger = logging.getLogger(__name__)
21
 
22
- if HF_INFERENCE_AVAILABLE:
23
- from huggingface_hub import InferenceClient
24
-
25
  class UniversalMCPClient:
26
- """Universal MCP Client for connecting to various MCP servers with multiple LLM backends"""
27
 
28
  def __init__(self):
29
  self.servers: Dict[str, MCPServerConfig] = {}
30
- self.anthropic_client = None
31
  self.hf_client = None
32
  self.current_provider = None
33
  self.current_model = None
 
34
 
35
- # Initialize Anthropic client if API key is available
36
- if AppConfig.ANTHROPIC_API_KEY:
37
- self.anthropic_client = anthropic.Anthropic(
38
- api_key=AppConfig.ANTHROPIC_API_KEY
 
39
  )
40
- logger.info("✅ Anthropic client initialized")
41
- else:
42
- logger.warning("⚠️ ANTHROPIC_API_KEY not found")
43
-
44
- # Initialize HuggingFace client if available
45
- if HF_INFERENCE_AVAILABLE and AppConfig.HF_TOKEN:
46
- logger.info("✅ HuggingFace Hub available")
47
  else:
48
- logger.warning("⚠️ HF_TOKEN not found or huggingface_hub not available")
49
 
50
- def configure_inference_provider(self, provider: str, model: str) -> bool:
51
- """Configure the inference provider and model"""
52
- try:
53
- if not HF_INFERENCE_AVAILABLE:
54
- logger.error("HuggingFace Hub not available")
55
- return False
56
-
57
- if not AppConfig.HF_TOKEN:
58
- logger.error("HF_TOKEN not configured")
59
- return False
60
-
61
- self.hf_client = InferenceClient(
62
- provider=provider,
63
- api_key=AppConfig.HF_TOKEN
64
- )
65
- self.current_provider = provider
66
- self.current_model = model
67
-
68
- logger.info(f"✅ Configured inference provider: {provider} with model: {model}")
69
- return True
70
-
71
- except Exception as e:
72
- logger.error(f"Failed to configure inference provider: {e}")
73
- return False
74
 
75
- def get_current_llm_backend(self) -> str:
76
- """Get the currently configured LLM backend"""
77
- if self.current_provider and self.hf_client:
78
- return f"HF Inference Provider: {self.current_provider}"
79
- elif self.anthropic_client:
80
- return "Anthropic Claude Sonnet 4"
81
- else:
82
- return "No LLM backend configured"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  async def add_server_async(self, config: MCPServerConfig) -> Tuple[bool, str]:
85
  """Add an MCP server using pure MCP protocol"""
@@ -121,11 +111,12 @@ class UniversalMCPClient:
121
  # Update config with proper MCP URL
122
  config.url = mcp_url
123
 
124
- # Test MCP connection
125
  success, message = await self._test_mcp_connection(config)
126
 
127
  if success:
128
  self.servers[config.name] = config
 
129
  logger.info(f"✅ MCP Server {config.name} added successfully")
130
  return True, f"✅ Successfully added MCP server: {config.name}\n{message}"
131
  else:
@@ -139,11 +130,11 @@ class UniversalMCPClient:
139
  return False, f"❌ {error_msg}"
140
 
141
  async def _test_mcp_connection(self, config: MCPServerConfig) -> Tuple[bool, str]:
142
- """Test MCP server connection with detailed debugging"""
143
  try:
144
  logger.info(f"🔍 Testing MCP connection to {config.url}")
145
 
146
- async with sse_client(config.url, timeout=AppConfig.MCP_TIMEOUT_SECONDS) as (read_stream, write_stream):
147
  async with ClientSession(read_stream, write_stream) as session:
148
  # Initialize MCP session
149
  logger.info("🔧 Initializing MCP session...")
@@ -153,14 +144,24 @@ class UniversalMCPClient:
153
  logger.info("📋 Listing available tools...")
154
  tools = await session.list_tools()
155
 
 
 
156
  tool_info = []
 
157
  for tool in tools.tools:
 
 
 
 
158
  tool_info.append(f" - {tool.name}: {tool.description}")
159
  logger.info(f" 📍 Tool: {tool.name}")
160
  logger.info(f" Description: {tool.description}")
161
  if hasattr(tool, 'inputSchema') and tool.inputSchema:
162
  logger.info(f" Input Schema: {tool.inputSchema}")
163
 
 
 
 
164
  if len(tools.tools) == 0:
165
  return False, "No tools found on MCP server"
166
 
@@ -174,453 +175,450 @@ class UniversalMCPClient:
174
  logger.error(traceback.format_exc())
175
  return False, f"Connection failed: {str(e)}"
176
 
177
- async def call_llm_with_mcp(self, messages: List[Dict[str, Any]], user_files: List[str]) -> List[Any]:
178
- """Call LLM with MCP servers - handles both Anthropic and HF providers"""
179
- if self.current_provider and self.hf_client:
180
- # Use HuggingFace Inference Provider with custom MCP implementation
181
- return await self._call_hf_with_custom_mcp(messages, user_files)
182
- elif self.anthropic_client:
183
- # Use Anthropic's native MCP support
184
- return self._call_anthropic_with_native_mcp(messages, user_files)
185
- else:
186
- raise ValueError("No LLM backend configured")
187
-
188
- def _call_anthropic_with_native_mcp(self, messages: List[Dict[str, Any]], user_files: List[str]) -> List[Any]:
189
- """Call Anthropic API with native MCP support (existing implementation)"""
190
- if not self.servers:
191
- return self._call_claude_without_mcp(messages)
192
-
193
- mcp_servers = []
194
- for server_name, config in self.servers.items():
195
- mcp_servers.append({
196
- "type": "url",
197
- "url": config.url,
198
- "name": server_name.replace(" ", "_").lower()
199
- })
200
-
201
- # Enhanced system prompt with multimodal and MCP instructions
202
- system_prompt = self._get_anthropic_mcp_system_prompt(user_files)
203
-
204
- # Debug logging
205
- logger.info(f"📤 Sending {len(messages)} messages to Claude API")
206
- logger.info(f"🔧 Using {len(mcp_servers)} MCP servers")
207
-
208
- start_time = time.time()
209
-
210
- # Call Claude with MCP connector using the correct beta API
211
- response = self.anthropic_client.beta.messages.create(
212
- model=AppConfig.CLAUDE_MODEL,
213
- max_tokens=AppConfig.MAX_TOKENS,
214
- system=system_prompt,
215
- messages=messages,
216
- mcp_servers=mcp_servers,
217
- betas=[AppConfig.MCP_BETA_VERSION]
218
- )
219
-
220
- return self._process_mcp_response(response, start_time)
221
-
222
- async def _call_hf_with_custom_mcp(self, messages: List[Dict[str, Any]], user_files: List[str]) -> List[Any]:
223
- """Call HuggingFace Inference Provider with custom MCP implementation"""
224
- from gradio import ChatMessage
225
- import time
226
 
227
- # Get available tools from MCP servers
228
- available_tools = await self._get_mcp_tools()
 
 
229
 
230
- if not available_tools:
231
- # No MCP tools available, use regular chat completion
232
- return await self._call_hf_without_mcp(messages)
233
 
234
- # Enhanced system prompt for HF providers with MCP
235
- system_prompt = self._get_hf_mcp_system_prompt(user_files, available_tools)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- # Add system message if not present
238
- if not messages or messages[0].get("role") != "system":
239
- messages.insert(0, {"role": "system", "content": system_prompt})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  else:
241
- messages[0]["content"] = system_prompt
 
 
 
 
 
242
 
243
- chat_messages = []
244
- max_iterations = 5 # Prevent infinite loops
245
- iteration = 0
246
 
247
- while iteration < max_iterations:
248
- iteration += 1
249
- logger.info(f"🔄 HF+MCP Iteration {iteration}")
250
 
251
- # Call HuggingFace model
252
- try:
253
- completion = self.hf_client.chat.completions.create(
254
- model=self.current_model,
255
- messages=messages,
256
- max_tokens=AppConfig.MAX_TOKENS,
257
- temperature=0.7
258
- )
259
-
260
- response_content = completion.choices[0].message.content
261
- logger.info(f"📝 HF Response: {response_content[:200]}...")
262
-
263
- # Check if model wants to use tools
264
- tool_calls = self._extract_tool_calls_from_response(response_content)
 
 
 
 
 
 
 
 
 
265
 
266
- if not tool_calls:
267
- # No tool calls, return final response
268
- chat_messages.append(ChatMessage(
269
- role="assistant",
270
- content=response_content
271
- ))
272
- break
273
 
274
- # Execute tool calls
275
- for tool_call in tool_calls:
276
- tool_name = tool_call.get("name")
277
- tool_args = tool_call.get("arguments", {})
 
 
 
 
 
 
278
 
279
- # Add thinking message for tool usage
280
- chat_messages.append(ChatMessage(
281
- role="assistant",
282
- content="",
283
- metadata={
284
- "title": f"🔧 Using {tool_name}",
285
- "id": f"tool_{iteration}",
286
- "status": "pending",
287
- "log": f"Calling MCP tool: {tool_name}"
 
 
288
  }
289
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- # Execute tool via MCP
292
- tool_result = await self._execute_mcp_tool(tool_name, tool_args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- # Update tool status
295
- for msg in chat_messages:
296
- if (msg.metadata and
297
- msg.metadata.get("id") == f"tool_{iteration}" and
298
- msg.metadata.get("status") == "pending"):
299
- msg.metadata["status"] = "done"
300
- break
301
 
302
- # Add tool result
303
- chat_messages.append(ChatMessage(
304
- role="assistant",
305
- content=tool_result,
306
- metadata={
307
- "title": "📋 Tool Result",
308
- "parent_id": f"tool_{iteration}",
309
- "status": "done"
310
- }
311
- ))
312
 
313
- # Add tool result to conversation context
314
- messages.append({
315
- "role": "assistant",
316
- "content": f"I used the tool {tool_name} and got this result: {tool_result}"
317
- })
318
 
319
- # Check for media in tool result
320
- media_url = self._extract_media_from_tool_result(tool_result)
321
- if media_url:
322
- chat_messages.append(ChatMessage(
323
- role="assistant",
324
- content={"path": media_url}
325
- ))
326
-
327
- # Continue conversation with tool results
328
- messages.append({
329
- "role": "user",
330
- "content": "Please provide a summary of the results and help with the user's original request."
331
- })
332
-
333
- except Exception as e:
334
- logger.error(f"Error in HF+MCP iteration {iteration}: {e}")
335
- chat_messages.append(ChatMessage(
336
- role="assistant",
337
- content=f"❌ Error during tool execution: {str(e)}"
338
- ))
339
- break
340
-
341
- if not chat_messages:
342
- chat_messages.append(ChatMessage(
343
- role="assistant",
344
- content="I understand your request and I'm here to help."
345
- ))
346
-
347
- return chat_messages
348
-
349
- async def _get_mcp_tools(self) -> List[Dict[str, Any]]:
350
- """Get available tools from all MCP servers"""
351
- tools = []
352
- for server_name, config in self.servers.items():
353
- try:
354
- async with sse_client(config.url, timeout=AppConfig.MCP_TIMEOUT_SECONDS) as (read_stream, write_stream):
355
- async with ClientSession(read_stream, write_stream) as session:
356
- await session.initialize()
357
- server_tools = await session.list_tools()
358
-
359
- for tool in server_tools.tools:
360
- tools.append({
361
- "name": tool.name,
362
- "description": tool.description,
363
- "server": server_name,
364
- "schema": tool.inputSchema if hasattr(tool, 'inputSchema') else {}
365
- })
366
-
367
- except Exception as e:
368
- logger.error(f"Failed to get tools from {server_name}: {e}")
369
 
370
- return tools
 
371
 
372
- def _extract_tool_calls_from_response(self, response: str) -> List[Dict[str, Any]]:
373
- """Extract tool calls from LLM response text"""
374
- # Look for tool call patterns in the response
375
- # This is a simple implementation - you might want to make this more robust
376
  import re
377
 
378
- tool_calls = []
379
-
380
- # Pattern to match tool calls like: CALL_TOOL: tool_name(arg1="value1", arg2="value2")
381
- pattern = r'CALL_TOOL:\s*(\w+)\((.*?)\)'
382
- matches = re.findall(pattern, response)
383
 
384
- for match in matches:
385
- tool_name = match[0]
386
- args_str = match[1]
387
-
388
- # Simple argument parsing (you might want to improve this)
389
- args = {}
390
- if args_str:
391
- arg_pairs = args_str.split(',')
392
- for pair in arg_pairs:
393
- if '=' in pair:
394
- key, value = pair.split('=', 1)
395
- key = key.strip().strip('"').strip("'")
396
- value = value.strip().strip('"').strip("'")
397
- args[key] = value
398
-
399
- tool_calls.append({
400
- "name": tool_name,
401
- "arguments": args
402
- })
403
 
404
- return tool_calls
405
-
406
- async def _execute_mcp_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str:
407
- """Execute a tool via MCP servers"""
408
- for server_name, config in self.servers.items():
409
  try:
410
- async with sse_client(config.url, timeout=AppConfig.MCP_TIMEOUT_SECONDS) as (read_stream, write_stream):
411
- async with ClientSession(read_stream, write_stream) as session:
412
- await session.initialize()
413
-
414
- # Check if this server has the tool
415
- tools = await session.list_tools()
416
- tool_found = False
417
- for tool in tools.tools:
418
- if tool.name == tool_name:
419
- tool_found = True
420
- break
421
-
422
- if not tool_found:
423
- continue
424
-
425
- # Call the tool
426
- result = await session.call_tool(tool_name, arguments)
 
 
 
 
 
 
427
 
428
- if result.content:
429
- return result.content[0].text if hasattr(result.content[0], 'text') else str(result.content[0])
430
- else:
431
- return "Tool executed successfully but returned no content"
432
-
433
  except Exception as e:
434
- logger.error(f"Failed to execute tool {tool_name} on {server_name}: {e}")
 
435
 
436
- return f"❌ Failed to execute tool: {tool_name}"
 
437
 
438
- def _extract_media_from_tool_result(self, result: str) -> Optional[str]:
439
- """Extract media URL from tool result"""
440
- # Use existing media extraction logic
441
- if not self.servers:
442
- return None
443
 
444
- # Use the first server's config for media extraction
445
- config = next(iter(self.servers.values()))
446
- return self._extract_media_from_mcp_response(result, config)
447
-
448
- async def _call_hf_without_mcp(self, messages: List[Dict[str, Any]]) -> List[Any]:
449
- """Call HuggingFace provider without MCP"""
450
- from gradio import ChatMessage
451
 
452
  try:
453
- completion = self.hf_client.chat.completions.create(
454
- model=self.current_model,
455
- messages=messages,
456
- max_tokens=AppConfig.MAX_TOKENS,
457
- temperature=0.7
458
- )
459
 
460
- response_content = completion.choices[0].message.content
 
 
461
 
462
- return [ChatMessage(role="assistant", content=response_content)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
  except Exception as e:
465
- logger.error(f"HF inference error: {e}")
466
- return [ChatMessage(role="assistant", content=f"❌ Error: {str(e)}")]
467
 
468
- def _call_claude_without_mcp(self, messages: List[Dict[str, Any]]) -> List[Any]:
469
- """Call Claude API without MCP servers (existing implementation)"""
470
- from gradio import ChatMessage
471
- import time
472
-
473
- logger.info("💬 No MCP servers available, using regular Claude chat")
474
 
475
- system_prompt = self._get_native_system_prompt()
 
 
 
476
 
477
- # Use regular messages API
478
- response = self.anthropic_client.messages.create(
479
- model=AppConfig.CLAUDE_MODEL,
480
- max_tokens=AppConfig.MAX_TOKENS,
481
- system=system_prompt,
482
- messages=messages
483
- )
484
 
485
- response_text = ""
486
- for content in response.content:
487
- if content.type == "text":
488
- response_text += content.text
489
 
490
- if not response_text:
491
- response_text = "I understand your request and I'm here to help."
 
 
 
492
 
493
- return [ChatMessage(role="assistant", content=response_text)]
494
-
495
- def _get_native_system_prompt(self) -> str:
496
- """Get system prompt for Claude without MCP servers"""
497
- from datetime import datetime
498
-
499
- return f"""You are Claude Sonnet 4, a helpful AI assistant with native multimodal capabilities. You can have conversations, answer questions, help with various tasks, and provide information on a wide range of topics.
500
-
501
- YOUR NATIVE CAPABILITIES (Available right now):
502
- - **Image Understanding**: You can directly see and describe images, analyze their content, read text in images, identify objects, people, scenes, etc.
503
- - **Text Processing**: You can analyze, summarize, translate, and process text directly
504
- - **General Knowledge**: You can answer questions, explain concepts, and have conversations
505
- - **Code Analysis**: You can read, analyze, and explain code
506
-
507
- Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
508
-
509
- IMPORTANT: You DO NOT need MCP servers for:
510
- - Describing or analyzing uploaded images
511
- - Reading text in images
512
- - Identifying objects, people, or scenes in images
513
- - General conversation and knowledge questions
514
-
515
- You DO need MCP servers for:
516
- - Creating new images, audio, or video
517
- - Editing or transforming existing media files
518
- - Transcribing audio files
519
- - Processing non-image files (audio, video, documents)
520
-
521
- If users upload images and ask you to describe or analyze them, use your native vision capabilities immediately. Only mention MCP servers if they ask for creation or editing tasks."""
522
-
523
- def _get_anthropic_mcp_system_prompt(self, user_files: List[str]) -> str:
524
- """Get system prompt for Claude with MCP servers (existing implementation)"""
525
- from datetime import datetime
526
-
527
- uploaded_files_context = ""
528
- if user_files:
529
- uploaded_files_context = f"\n\nFILES UPLOADED BY USER:\n"
530
- for i, file_path in enumerate(user_files, 1):
531
- file_name = file_path.split('/')[-1] if '/' in file_path else file_path
532
- if AppConfig.is_image_file(file_path):
533
- file_type = "Image"
534
- elif AppConfig.is_audio_file(file_path):
535
- file_type = "Audio"
536
- elif AppConfig.is_video_file(file_path):
537
- file_type = "Video"
538
- else:
539
- file_type = "File"
540
- uploaded_files_context += f"{i}. {file_type}: {file_name} (path: {file_path})\n"
541
 
542
- return f"""You are Claude Sonnet 4, a helpful AI assistant with both native multimodal capabilities and access to various MCP tools.
543
-
544
- YOUR NATIVE CAPABILITIES (No MCP tools needed):
545
- - **Image Understanding**: You can directly see and describe images, analyze their content, read text in images, etc.
546
- - **Text Processing**: You can analyze, summarize, translate, and process text directly
547
- - **General Knowledge**: You can answer questions, explain concepts, and have conversations
548
- - **Code Analysis**: You can read, analyze, and explain code
549
-
550
- WHEN TO USE MCP TOOLS:
551
- - **Image Generation**: Creating new images from text prompts
552
- - **Image Editing**: Modifying, enhancing, or transforming existing images
553
- - **Audio Processing**: Transcribing audio, generating speech, audio enhancement
554
- - **Video Processing**: Creating or editing videos
555
- - **Specialized Analysis**: Tasks requiring specific models or APIs
556
-
557
- UPLOADED FILES HANDLING:
558
- {uploaded_files_context}
559
-
560
- IMPORTANT - For uploaded images:
561
- - **Image Description/Analysis**: Use your NATIVE vision capabilities - you can see and describe images directly
562
- - **Image Editing/Enhancement**: Use MCP image processing tools
563
- - **Image Generation**: Use MCP image generation tools
564
-
565
- IMPORTANT - GRADIO MEDIA DISPLAY:
566
- When MCP tools return media, end your response with "MEDIA_GENERATED: [URL]" where [URL] is the actual media URL.
567
-
568
- Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
569
- Available MCP servers: {list(self.servers.keys())}"""
570
-
571
- def _get_hf_mcp_system_prompt(self, user_files: List[str], available_tools: List[Dict[str, Any]]) -> str:
572
- """Get system prompt for HuggingFace providers with MCP"""
573
- from datetime import datetime
574
-
575
- uploaded_files_context = ""
576
- if user_files:
577
- uploaded_files_context = f"\n\nFILES UPLOADED BY USER:\n"
578
- for i, file_path in enumerate(user_files, 1):
579
- file_name = file_path.split('/')[-1] if '/' in file_path else file_path
580
- if AppConfig.is_image_file(file_path):
581
- file_type = "Image"
582
- elif AppConfig.is_audio_file(file_path):
583
- file_type = "Audio"
584
- elif AppConfig.is_video_file(file_path):
585
- file_type = "Video"
586
- else:
587
- file_type = "File"
588
- uploaded_files_context += f"{i}. {file_type}: {file_name} (path: {file_path})\n"
589
-
590
- tools_context = ""
591
- if available_tools:
592
- tools_context = f"\n\nAVAILABLE MCP TOOLS:\n"
593
- for tool in available_tools:
594
- tools_context += f"- {tool['name']}: {tool['description']} (server: {tool['server']})\n"
595
-
596
- tools_context += f"\nTo use a tool, respond with: CALL_TOOL: tool_name(arg1=\"value1\", arg2=\"value2\")\n"
597
 
598
- return f"""You are an AI assistant using {self.current_provider} inference with {self.current_model}. You have access to external tools via MCP (Model Context Protocol).
599
-
600
- YOUR CAPABILITIES:
601
- - **Text Processing**: You can analyze, summarize, translate, and process text directly
602
- - **General Knowledge**: You can answer questions, explain concepts, and have conversations
603
- - **Code Analysis**: You can read, analyze, and explain code
604
- - **Tool Usage**: You can call external tools to extend your capabilities
605
-
606
- UPLOADED FILES HANDLING:
607
- {uploaded_files_context}
608
-
609
- {tools_context}
610
-
611
- IMPORTANT INSTRUCTIONS:
612
- - For complex tasks requiring specialized capabilities, use the available MCP tools
613
- - When you need to use a tool, clearly indicate it with the CALL_TOOL format
614
- - After using tools, provide a clear summary of the results to the user
615
- - If a tool returns media (images, audio, video), describe what was generated/processed
616
-
617
- Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
618
- Current LLM: {self.current_provider}/{self.current_model}
619
- Available MCP servers: {list(self.servers.keys())}"""
620
 
621
- # Include existing helper methods from original implementation
622
  def _extract_media_from_mcp_response(self, result_text: str, config: MCPServerConfig) -> Optional[str]:
623
- """Enhanced media extraction from MCP responses (existing implementation)"""
624
  if not isinstance(result_text, str):
625
  logger.info(f"🔍 Non-string result: {type(result_text)}")
626
  return None
@@ -647,13 +645,13 @@ Available MCP servers: {list(self.servers.keys())}"""
647
  if media_type in item and isinstance(item[media_type], dict):
648
  media_data = item[media_type]
649
  if 'url' in media_data:
650
- url = media_data['url']
651
  logger.info(f"🎯 Found {media_type} URL: {url}")
652
  return self._resolve_media_url(url, base_url)
653
 
654
  # Check for direct URL
655
  if 'url' in item:
656
- url = item['url']
657
  logger.info(f"🎯 Found direct URL: {url}")
658
  return self._resolve_media_url(url, base_url)
659
 
@@ -666,13 +664,13 @@ Available MCP servers: {list(self.servers.keys())}"""
666
  if media_type in data and isinstance(data[media_type], dict):
667
  media_data = data[media_type]
668
  if 'url' in media_data:
669
- url = media_data['url']
670
  logger.info(f"🎯 Found {media_type} URL: {url}")
671
  return self._resolve_media_url(url, base_url)
672
 
673
  # Check for direct URL
674
  if 'url' in data:
675
- url = data['url']
676
  logger.info(f"🎯 Found direct URL: {url}")
677
  return self._resolve_media_url(url, base_url)
678
 
@@ -681,17 +679,36 @@ Available MCP servers: {list(self.servers.keys())}"""
681
  except Exception as e:
682
  logger.warning(f"🔍 JSON parsing error: {e}")
683
 
684
- # 2. Check for data URLs (base64 encoded media)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  if result_text.startswith('data:'):
686
  logger.info("🎯 Found data URL")
687
  return result_text
688
 
689
- # 3. Check for base64 image patterns
690
- if any(result_text.startswith(pattern) for pattern in ['iVBORw0KGgoAAAANSUhEU', '/9j/', 'UklGR']):
691
- logger.info("🎯 Found base64 image data")
692
- return f"data:image/png;base64,{result_text}"
693
-
694
- # 4. Check for file paths and convert to URLs
695
  if AppConfig.is_media_file(result_text):
696
  # Extract just the filename if it's a path
697
  if '/' in result_text:
@@ -699,246 +716,29 @@ Available MCP servers: {list(self.servers.keys())}"""
699
  else:
700
  filename = result_text.strip()
701
 
702
- # Create Gradio file URL
703
- if filename.startswith('http'):
704
- media_url = filename
705
- else:
706
- media_url = f"{base_url}/file={filename}"
707
-
708
- logger.info(f"🎯 Found media file: {media_url}")
709
  return media_url
710
 
711
- # 5. Check for HTTP URLs that look like media
712
- if result_text.startswith('http') and AppConfig.is_media_file(result_text):
713
- logger.info(f"🎯 Found HTTP media URL: {result_text}")
714
- return result_text
715
-
716
  logger.info("❌ No media detected in result")
717
  return None
718
 
719
  def _resolve_media_url(self, url: str, base_url: str) -> str:
720
- """Resolve relative URLs to absolute URLs"""
721
  if url.startswith('http') or url.startswith('data:'):
722
  return url
 
 
 
 
 
 
723
  elif url.startswith('/'):
724
  return f"{base_url}/file={url}"
725
  else:
726
  return f"{base_url}/file={url}"
727
 
728
- def _process_mcp_response(self, response, start_time: float) -> List[Any]:
729
- """Process Claude's response with MCP tool calls into structured ChatMessage objects (existing implementation)"""
730
- from gradio import ChatMessage
731
- import time
732
-
733
- chat_messages = []
734
- current_tool_id = None
735
- current_server_name = None
736
- tool_start_time = None
737
- text_segments = [] # Collect text segments separately
738
-
739
- # Process Claude's response
740
- for content in response.content:
741
- if content.type == "text":
742
- # Collect text segments but don't combine them yet
743
- text_content = content.text
744
- # Check if Claude indicated media was generated
745
- if "MEDIA_GENERATED:" in text_content:
746
- media_match = re.search(r"MEDIA_GENERATED:\s*([^\s]+)", text_content)
747
- if media_match:
748
- media_url = media_match.group(1)
749
- # Clean up the response text
750
- text_content = re.sub(r"MEDIA_GENERATED:\s*[^\s]+", "", text_content).strip()
751
- logger.info(f"🎯 Claude indicated media generated: {media_url}")
752
- # Add media as separate message
753
- chat_messages.append(ChatMessage(
754
- role="assistant",
755
- content={"path": media_url}
756
- ))
757
-
758
- if text_content.strip():
759
- text_segments.append(text_content.strip())
760
-
761
- elif hasattr(content, 'type') and content.type == "mcp_tool_use":
762
- # Add any accumulated text before tool use
763
- if text_segments:
764
- combined_text = " ".join(text_segments)
765
- if combined_text.strip():
766
- chat_messages.append(ChatMessage(
767
- role="assistant",
768
- content=combined_text.strip()
769
- ))
770
- text_segments = [] # Reset
771
-
772
- tool_name = content.name
773
- server_name = content.server_name
774
- current_tool_id = getattr(content, 'id', 'unknown')
775
- current_server_name = server_name
776
- tool_start_time = time.time()
777
-
778
- logger.info(f"🔧 Claude used MCP tool: {tool_name} on server: {server_name}")
779
-
780
- # Create a "thinking" message for tool usage
781
- chat_messages.append(ChatMessage(
782
- role="assistant",
783
- content="",
784
- metadata={
785
- "title": f"🔧 Using {tool_name}",
786
- "id": current_tool_id,
787
- "status": "pending",
788
- "log": f"Server: {server_name}"
789
- }
790
- ))
791
-
792
- elif hasattr(content, 'type') and content.type == "mcp_tool_result":
793
- tool_use_id = getattr(content, 'tool_use_id', 'unknown')
794
- duration = time.time() - tool_start_time if tool_start_time else None
795
-
796
- logger.info(f"📝 Processing MCP tool result (tool_use_id: {tool_use_id})")
797
-
798
- # Update the pending tool message to completed
799
- for msg in chat_messages:
800
- if (msg.metadata and
801
- msg.metadata.get("id") == current_tool_id and
802
- msg.metadata.get("status") == "pending"):
803
- msg.metadata["status"] = "done"
804
- if duration:
805
- msg.metadata["duration"] = round(duration, 2)
806
- break
807
-
808
- media_url = None
809
- if content.content:
810
- result_content = content.content[0]
811
- result_text = result_content.text if hasattr(result_content, 'text') else str(result_content)
812
-
813
- logger.info(f"📝 MCP tool result: {result_text[:200]}...")
814
-
815
- # Try to extract media URL from the result
816
- if current_server_name and current_server_name in self.servers:
817
- config = self.servers[current_server_name]
818
- extracted_media = self._extract_media_from_mcp_response(result_text, config)
819
- if extracted_media:
820
- media_url = extracted_media
821
- logger.info(f"🎯 Extracted media from MCP result: {media_url}")
822
- else:
823
- # Fallback: try all servers to find media
824
- for server_name, config in self.servers.items():
825
- extracted_media = self._extract_media_from_mcp_response(result_text, config)
826
- if extracted_media:
827
- media_url = extracted_media
828
- logger.info(f"🎯 Extracted media from MCP result (fallback): {media_url}")
829
- break
830
-
831
- # Always show the full tool result
832
- chat_messages.append(ChatMessage(
833
- role="assistant",
834
- content=result_text,
835
- metadata={
836
- "title": "📋 Tool Result",
837
- "parent_id": current_tool_id,
838
- "status": "done"
839
- }
840
- ))
841
-
842
- # Only add separate media display if the tool result does NOT contain
843
- # any Gradio file data structures that would be auto-rendered
844
- if media_url and not self._contains_gradio_file_structure(result_text):
845
- logger.info(f"🎯 Adding separate media display for: {media_url}")
846
- chat_messages.append(ChatMessage(
847
- role="assistant",
848
- content={"path": media_url}
849
- ))
850
- else:
851
- if media_url:
852
- logger.info(f"🚫 Skipping separate media - tool result contains Gradio file structure")
853
- else:
854
- logger.info(f"🚫 No media URL extracted")
855
-
856
- else:
857
- # Add error message for failed tool call
858
- chat_messages.append(ChatMessage(
859
- role="assistant",
860
- content="Tool call failed: No content returned",
861
- metadata={
862
- "title": "❌ Tool Error",
863
- "parent_id": current_tool_id,
864
- "status": "done"
865
- }
866
- ))
867
-
868
- # Add any remaining text segments after all processing
869
- if text_segments:
870
- combined_text = " ".join(text_segments)
871
- if combined_text.strip():
872
- chat_messages.append(ChatMessage(
873
- role="assistant",
874
- content=combined_text.strip()
875
- ))
876
-
877
- # Fallback if no content was processed
878
- if not chat_messages:
879
- chat_messages.append(ChatMessage(
880
- role="assistant",
881
- content="I understand your request and I'm here to help."
882
- ))
883
-
884
- return chat_messages
885
-
886
- def _contains_gradio_file_structure(self, text: str) -> bool:
887
- """Check if the text contains ANY Gradio file data structures that would be auto-rendered (existing implementation)"""
888
-
889
- # Check for key indicators of Gradio file structures
890
- gradio_indicators = [
891
- # Gradio FileData type indicators
892
- "'_type': 'gradio.FileData'",
893
- '"_type": "gradio.FileData"',
894
- 'gradio.FileData',
895
-
896
- # File structure patterns
897
- "'path':",
898
- '"path":',
899
- "'url':",
900
- '"url":',
901
- "'orig_name':",
902
- '"orig_name":',
903
- "'mime_type':",
904
- '"mime_type":',
905
- 'is_stream',
906
- 'meta_type',
907
-
908
- # Common file result patterns
909
- "{'image':",
910
- '{"image":',
911
- "{'audio':",
912
- '{"audio":',
913
- "{'video':",
914
- '{"video":',
915
- "{'file':",
916
- '{"file":',
917
-
918
- # List patterns that typically contain file objects
919
- "[{'image'",
920
- '[{"image"',
921
- "[{'audio'",
922
- '[{"audio"',
923
- "[{'video'",
924
- '[{"video"',
925
- "[{'file'",
926
- '[{"file"'
927
- ]
928
-
929
- # If we find multiple indicators, it's likely a Gradio file structure
930
- indicator_count = sum(1 for indicator in gradio_indicators if indicator in text)
931
-
932
- # Also check for simple URL patterns (for audio case)
933
- is_simple_url = (text.strip().startswith('http') and
934
- len(text.strip().split()) == 1 and
935
- any(ext in text.lower() for ext in ['.wav', '.mp3', '.mp4', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.webm', '.ogg']))
936
-
937
- result = indicator_count >= 2 or is_simple_url
938
- logger.debug(f"📋 File structure check: {indicator_count} indicators, simple_url: {is_simple_url}, result: {result}")
939
-
940
- return result
941
-
942
  def get_server_status(self) -> Dict[str, str]:
943
  """Get status of all configured servers"""
944
  status = {}
@@ -956,4 +756,4 @@ Available MCP servers: {list(self.servers.keys())}"""
956
  elif "localhost" in config.url or "127.0.0.1" in config.url:
957
  return "🟢 Local server (file access available)"
958
  else:
959
- return "🔴 Remote server (may need public URLs)"
 
1
  """
2
+ MCP Client implementation for Universal MCP Client - Fixed Version
 
3
  """
4
  import asyncio
5
  import json
6
  import re
 
 
 
7
  import logging
8
  import traceback
9
+ from typing import Dict, Optional, Tuple, List, Any
10
+ from openai import OpenAI
11
 
12
  # Import the proper MCP client components
13
  from mcp import ClientSession
14
  from mcp.client.sse import sse_client
15
 
16
+ from config import MCPServerConfig, AppConfig, HTTPX_AVAILABLE
17
 
18
  logger = logging.getLogger(__name__)
19
 
 
 
 
20
  class UniversalMCPClient:
21
+ """Universal MCP Client using HuggingFace Inference Providers instead of Anthropic"""
22
 
23
  def __init__(self):
24
  self.servers: Dict[str, MCPServerConfig] = {}
25
+ self.enabled_servers: Dict[str, bool] = {} # Track enabled/disabled servers
26
  self.hf_client = None
27
  self.current_provider = None
28
  self.current_model = None
29
+ self.server_tools = {} # Cache for server tools
30
 
31
+ # Initialize HF Inference Client if token is available
32
+ if AppConfig.HF_TOKEN:
33
+ self.hf_client = OpenAI(
34
+ base_url="https://router.huggingface.co/v1",
35
+ api_key=AppConfig.HF_TOKEN
36
  )
37
+ logger.info("✅ HuggingFace Inference client initialized")
 
 
 
 
 
 
38
  else:
39
+ logger.warning("⚠️ HF_TOKEN not found")
40
 
41
+ def enable_server(self, server_name: str, enabled: bool = True):
42
+ """Enable or disable a server"""
43
+ if server_name in self.servers:
44
+ self.enabled_servers[server_name] = enabled
45
+ logger.info(f"🔧 Server {server_name} {'enabled' if enabled else 'disabled'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ def get_enabled_servers(self) -> Dict[str, MCPServerConfig]:
48
+ """Get only enabled servers"""
49
+ return {name: config for name, config in self.servers.items()
50
+ if self.enabled_servers.get(name, True)}
51
+
52
+ def remove_all_servers(self):
53
+ """Remove all servers"""
54
+ count = len(self.servers)
55
+ self.servers.clear()
56
+ self.enabled_servers.clear()
57
+ self.server_tools.clear()
58
+ logger.info(f"🗑️ Removed all {count} servers")
59
+ return count
60
+
61
+ def set_model_and_provider(self, provider_id: str, model_id: str):
62
+ """Set the current provider and model"""
63
+ self.current_provider = provider_id
64
+ self.current_model = model_id
65
+ logger.info(f"🔧 Set provider: {provider_id}, model: {model_id}")
66
+
67
+ def get_model_endpoint(self) -> str:
68
+ """Get the current model endpoint for API calls"""
69
+ if not self.current_provider or not self.current_model:
70
+ raise ValueError("Provider and model must be set before making API calls")
71
+
72
+ return AppConfig.get_model_endpoint(self.current_model, self.current_provider)
73
 
74
  async def add_server_async(self, config: MCPServerConfig) -> Tuple[bool, str]:
75
  """Add an MCP server using pure MCP protocol"""
 
111
  # Update config with proper MCP URL
112
  config.url = mcp_url
113
 
114
+ # Test MCP connection and cache tools
115
  success, message = await self._test_mcp_connection(config)
116
 
117
  if success:
118
  self.servers[config.name] = config
119
+ self.enabled_servers[config.name] = True # Enable by default
120
  logger.info(f"✅ MCP Server {config.name} added successfully")
121
  return True, f"✅ Successfully added MCP server: {config.name}\n{message}"
122
  else:
 
130
  return False, f"❌ {error_msg}"
131
 
132
  async def _test_mcp_connection(self, config: MCPServerConfig) -> Tuple[bool, str]:
133
+ """Test MCP server connection with detailed debugging and tool caching"""
134
  try:
135
  logger.info(f"🔍 Testing MCP connection to {config.url}")
136
 
137
+ async with sse_client(config.url, timeout=20.0) as (read_stream, write_stream):
138
  async with ClientSession(read_stream, write_stream) as session:
139
  # Initialize MCP session
140
  logger.info("🔧 Initializing MCP session...")
 
144
  logger.info("📋 Listing available tools...")
145
  tools = await session.list_tools()
146
 
147
+ # Cache tools for this server
148
+ server_tools = {}
149
  tool_info = []
150
+
151
  for tool in tools.tools:
152
+ server_tools[tool.name] = {
153
+ 'description': tool.description,
154
+ 'schema': tool.inputSchema if hasattr(tool, 'inputSchema') else None
155
+ }
156
  tool_info.append(f" - {tool.name}: {tool.description}")
157
  logger.info(f" 📍 Tool: {tool.name}")
158
  logger.info(f" Description: {tool.description}")
159
  if hasattr(tool, 'inputSchema') and tool.inputSchema:
160
  logger.info(f" Input Schema: {tool.inputSchema}")
161
 
162
+ # Cache tools for this server
163
+ self.server_tools[config.name] = server_tools
164
+
165
  if len(tools.tools) == 0:
166
  return False, "No tools found on MCP server"
167
 
 
175
  logger.error(traceback.format_exc())
176
  return False, f"Connection failed: {str(e)}"
177
 
178
+ async def call_mcp_tool_async(self, server_name: str, tool_name: str, arguments: dict) -> Tuple[bool, str]:
179
+ """Call a tool on a specific MCP server"""
180
+ logger.info(f"🔧 MCP Tool Call - Server: {server_name}, Tool: {tool_name}")
181
+ logger.info(f"🔧 Arguments: {arguments}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ if server_name not in self.servers:
184
+ error_msg = f"Server {server_name} not found. Available servers: {list(self.servers.keys())}"
185
+ logger.error(f"❌ {error_msg}")
186
+ return False, error_msg
187
 
188
+ config = self.servers[server_name]
189
+ logger.info(f"🔧 Using server config: {config.url}")
 
190
 
191
+ try:
192
+ logger.info(f"🔗 Connecting to MCP server at {config.url}")
193
+ async with sse_client(config.url, timeout=30.0) as (read_stream, write_stream):
194
+ async with ClientSession(read_stream, write_stream) as session:
195
+ # Initialize MCP session
196
+ logger.info("🔧 Initializing MCP session...")
197
+ await session.initialize()
198
+
199
+ # Call the tool
200
+ logger.info(f"🔧 Calling tool {tool_name} with arguments: {arguments}")
201
+ result = await session.call_tool(tool_name, arguments)
202
+
203
+ # Extract result content
204
+ if result.content:
205
+ result_text = result.content[0].text if hasattr(result.content[0], 'text') else str(result.content[0])
206
+ logger.info(f"✅ Tool call successful, result length: {len(result_text)}")
207
+ logger.info(f"📋 Result preview: {result_text[:200]}...")
208
+ return True, result_text
209
+ else:
210
+ error_msg = "No content returned from tool"
211
+ logger.error(f"❌ {error_msg}")
212
+ return False, error_msg
213
+
214
+ except asyncio.TimeoutError:
215
+ error_msg = f"Tool call timeout for {tool_name} on {server_name}"
216
+ logger.error(f"❌ {error_msg}")
217
+ return False, error_msg
218
+ except Exception as e:
219
+ error_msg = f"Tool call failed: {str(e)}"
220
+ logger.error(f"❌ MCP tool call failed: {e}")
221
+ logger.error(traceback.format_exc())
222
+ return False, error_msg
223
+
224
+ def generate_chat_completion(self, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
225
+ """Generate chat completion using HuggingFace Inference Providers"""
226
+ if not self.hf_client:
227
+ raise ValueError("HuggingFace client not initialized. Please set HF_TOKEN.")
228
+
229
+ if not self.current_provider or not self.current_model:
230
+ raise ValueError("Provider and model must be set before making API calls")
231
+
232
+ # Get the model endpoint
233
+ model_endpoint = self.get_model_endpoint()
234
+
235
+ # Set up default parameters for GPT OSS models with higher limits
236
+ params = {
237
+ "model": model_endpoint,
238
+ "messages": messages,
239
+ "max_tokens": kwargs.pop("max_tokens", 8192), # Use pop to avoid conflicts
240
+ "temperature": kwargs.get("temperature", 0.3),
241
+ "stream": kwargs.get("stream", False)
242
+ }
243
+
244
+ # Add any remaining kwargs
245
+ params.update(kwargs)
246
+
247
+ # Add reasoning effort if specified (GPT OSS feature)
248
+ reasoning_effort = kwargs.pop("reasoning_effort", AppConfig.DEFAULT_REASONING_EFFORT)
249
+ if reasoning_effort:
250
+ # For GPT OSS models, we can set reasoning in system prompt
251
+ system_message = None
252
+ for msg in messages:
253
+ if msg.get("role") == "system":
254
+ system_message = msg
255
+ break
256
+
257
+ if system_message:
258
+ system_message["content"] += f"\n\nReasoning: {reasoning_effort}"
259
+ else:
260
+ messages.insert(0, {
261
+ "role": "system",
262
+ "content": f"You are a helpful AI assistant. Reasoning: {reasoning_effort}"
263
+ })
264
 
265
+ try:
266
+ logger.info(f"🤖 Calling {model_endpoint} via {self.current_provider}")
267
+ response = self.hf_client.chat.completions.create(**params)
268
+ return response
269
+ except Exception as e:
270
+ logger.error(f"HF Inference API call failed: {e}")
271
+ raise
272
+
273
+ def generate_chat_completion_with_mcp_tools(self, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
274
+ """Generate chat completion with MCP tool support"""
275
+ enabled_servers = self.get_enabled_servers()
276
+ if not enabled_servers:
277
+ # No enabled MCP servers available, use regular completion
278
+ logger.info("🤖 No enabled MCP servers available, using regular chat completion")
279
+ return self.generate_chat_completion(messages, **kwargs)
280
+
281
+ logger.info(f"🔧 Processing chat with {len(enabled_servers)} enabled MCP servers available")
282
+
283
+ # Add system message about available tools with exact tool names
284
+ tool_descriptions = []
285
+ server_names = []
286
+ exact_tool_mappings = []
287
+
288
+ for server_name, config in enabled_servers.items():
289
+ tool_descriptions.append(f"- **{server_name}**: {config.description}")
290
+ server_names.append(server_name)
291
+
292
+ # Add exact tool names if we have them cached
293
+ if server_name in self.server_tools:
294
+ for tool_name, tool_info in self.server_tools[server_name].items():
295
+ exact_tool_mappings.append(f" * Server '{server_name}' has tool '{tool_name}': {tool_info['description']}")
296
+
297
+ # Get the actual server name (not the space ID)
298
+ server_list = ", ".join([f'"{name}"' for name in server_names])
299
+
300
+ tools_system_msg = f"""
301
+ You have access to the following MCP tools:
302
+ {chr(10).join(tool_descriptions)}
303
+ EXACT TOOL MAPPINGS:
304
+ {chr(10).join(exact_tool_mappings) if exact_tool_mappings else "Loading tool mappings..."}
305
+ IMPORTANT SERVER NAMES: {server_list}
306
+ When you need to use a tool, respond with ONLY a JSON object in this EXACT format:
307
+ {{"use_tool": true, "server": "exact_server_name", "tool": "exact_tool_name", "arguments": {{"param": "value"}}}}
308
+ CRITICAL INSTRUCTIONS:
309
+ - Use ONLY the exact server names from this list: {server_list}
310
+ - Use the exact tool names as shown in the mappings above
311
+ - Always include all required parameters in the arguments
312
+ - Do not include any other text before or after the JSON
313
+ - Make sure the JSON is complete and properly formatted
314
+ If you don't need to use a tool, respond normally without any JSON.
315
+ """
316
+
317
+ # Add tools system message with increased context
318
+ enhanced_messages = messages.copy()
319
+ if enhanced_messages and enhanced_messages[0].get("role") == "system":
320
+ enhanced_messages[0]["content"] += "\n\n" + tools_system_msg
321
  else:
322
+ enhanced_messages.insert(0, {"role": "system", "content": tools_system_msg})
323
+
324
+ # Get initial response with higher token limit
325
+ logger.info("🤖 Getting initial response from LLM...")
326
+ response = self.generate_chat_completion(enhanced_messages, **{"max_tokens": 8192})
327
+ response_text = response.choices[0].message.content
328
 
329
+ logger.info(f"🤖 LLM Response (length: {len(response_text)}): {response_text}")
 
 
330
 
331
+ # Check if the response indicates tool usage
332
+ if '"use_tool": true' in response_text:
333
+ logger.info("🔧 Tool usage detected, parsing JSON...")
334
 
335
+ # Extract and parse JSON more robustly
336
+ tool_request = self._extract_tool_json(response_text)
337
+
338
+ if not tool_request:
339
+ # Fallback: try to extract tool info manually
340
+ logger.info("🔧 JSON parsing failed, trying manual extraction...")
341
+ tool_request = self._manual_tool_extraction(response_text)
342
+
343
+ if tool_request:
344
+ server_name = tool_request.get("server")
345
+ tool_name = tool_request.get("tool")
346
+ arguments = tool_request.get("arguments", {})
347
+
348
+ # Replace any local file paths in arguments with uploaded URLs
349
+ if hasattr(self, 'chat_handler_file_mapping'):
350
+ for arg_key, arg_value in arguments.items():
351
+ if isinstance(arg_value, str) and arg_value.startswith('/tmp/gradio/'):
352
+ # Check if we have an uploaded URL for this local path
353
+ for local_path, uploaded_url in self.chat_handler_file_mapping.items():
354
+ if local_path in arg_value or arg_value in local_path:
355
+ logger.info(f"🔄 Replacing local path {arg_value} with uploaded URL {uploaded_url}")
356
+ arguments[arg_key] = uploaded_url
357
+ break
358
 
359
+ logger.info(f"🔧 Tool request - Server: {server_name}, Tool: {tool_name}, Args: {arguments}")
 
 
 
 
 
 
360
 
361
+ if server_name not in self.servers:
362
+ available_servers = list(self.servers.keys())
363
+ logger.error(f"❌ Server '{server_name}' not found. Available servers: {available_servers}")
364
+ # Try to find a matching server by space_id or similar name
365
+ matching_server = None
366
+ for srv_name, srv_config in self.servers.items():
367
+ if (srv_config.space_id and server_name in srv_config.space_id) or server_name in srv_name:
368
+ matching_server = srv_name
369
+ logger.info(f"🔧 Found matching server: {matching_server}")
370
+ break
371
 
372
+ if matching_server and self.enabled_servers.get(matching_server, True):
373
+ server_name = matching_server
374
+ logger.info(f"🔧 Using corrected server name: {server_name}")
375
+ else:
376
+ # Return error response with server name correction
377
+ error_msg = f"Server '{server_name}' not found or disabled. Available enabled servers: {[name for name, enabled in self.enabled_servers.items() if enabled]}"
378
+ response._tool_execution = {
379
+ "server": server_name,
380
+ "tool": tool_name,
381
+ "result": error_msg,
382
+ "success": False
383
  }
384
+ return response
385
+ elif not self.enabled_servers.get(server_name, True):
386
+ logger.error(f"❌ Server '{server_name}' is disabled")
387
+ response._tool_execution = {
388
+ "server": server_name,
389
+ "tool": tool_name,
390
+ "result": f"Server '{server_name}' is currently disabled",
391
+ "success": False
392
+ }
393
+ return response
394
+ # Validate tool name exists for this server
395
+ if server_name in self.server_tools and tool_name not in self.server_tools[server_name]:
396
+ available_tools = list(self.server_tools[server_name].keys())
397
+ logger.warning(f"⚠️ Tool '{tool_name}' not found for server '{server_name}'. Available tools: {available_tools}")
398
 
399
+ # Try to find the correct tool name
400
+ if available_tools:
401
+ # Use the first available tool if there's only one
402
+ if len(available_tools) == 1:
403
+ tool_name = available_tools[0]
404
+ logger.info(f"🔧 Using only available tool: {tool_name}")
405
+ # Or try to find a similar tool name
406
+ else:
407
+ for available_tool in available_tools:
408
+ if tool_name.lower() in available_tool.lower() or available_tool.lower() in tool_name.lower():
409
+ tool_name = available_tool
410
+ logger.info(f"🔧 Found similar tool name: {tool_name}")
411
+ break
412
+
413
+ # Call the MCP tool
414
+ def run_mcp_tool():
415
+ loop = asyncio.new_event_loop()
416
+ asyncio.set_event_loop(loop)
417
+ try:
418
+ return loop.run_until_complete(
419
+ self.call_mcp_tool_async(server_name, tool_name, arguments)
420
+ )
421
+ finally:
422
+ loop.close()
423
+
424
+ success, result = run_mcp_tool()
425
+
426
+ if success:
427
+ logger.info(f"✅ Tool call successful, result length: {len(str(result))}")
428
 
429
+ # Add tool result to conversation and get final response with better prompting
430
+ enhanced_messages.append({"role": "assistant", "content": response_text})
431
+ enhanced_messages.append({"role": "user", "content": f"Tool '{tool_name}' from server '{server_name}' completed successfully. Result: {result}\n\nPlease provide a helpful response based on this tool result. If the result contains media URLs, present them appropriately."})
 
 
 
 
432
 
433
+ # Remove the tool instruction from the system message for the final response
434
+ final_messages = enhanced_messages.copy()
435
+ if final_messages[0].get("role") == "system":
436
+ final_messages[0]["content"] = final_messages[0]["content"].split("You have access to the following MCP tools:")[0].strip()
 
 
 
 
 
 
437
 
438
+ logger.info("🤖 Getting final response with tool result...")
439
+ final_response = self.generate_chat_completion(final_messages, **{"max_tokens": 4096})
 
 
 
440
 
441
+ # Store tool execution info for the chat handler
442
+ final_response._tool_execution = {
443
+ "server": server_name,
444
+ "tool": tool_name,
445
+ "result": result,
446
+ "success": True
447
+ }
448
+
449
+ return final_response
450
+ else:
451
+ logger.error(f"❌ Tool call failed: {result}")
452
+ # Return original response with error info
453
+ response._tool_execution = {
454
+ "server": server_name,
455
+ "tool": tool_name,
456
+ "result": result,
457
+ "success": False
458
+ }
459
+ return response
460
+ else:
461
+ logger.warning("⚠️ Failed to parse tool request JSON")
462
+ else:
463
+ logger.info("💬 No tool usage detected, returning normal response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ # Return original response if no tool usage or tool call failed
466
+ return response
467
 
468
+ def _extract_tool_json(self, text: str) -> Optional[Dict[str, Any]]:
469
+ """Extract JSON from LLM response more robustly"""
470
+ import json
 
471
  import re
472
 
473
+ logger.info(f"🔍 Full LLM response text: {text}")
 
 
 
 
474
 
475
+ # Try multiple strategies to extract JSON
476
+ strategies = [
477
+ # Strategy 1: Find complete JSON between outer braces
478
+ lambda t: re.search(r'\{[^{}]*"use_tool"[^{}]*"arguments"[^{}]*\{[^{}]*\}[^{}]*\}', t),
479
+ # Strategy 2: Find JSON that starts with {"use_tool" and reconstruct if needed
480
+ lambda t: self._reconstruct_json_from_start(t),
481
+ # Strategy 3: Find any complete JSON object
482
+ lambda t: re.search(r'\{(?:[^{}]|\{[^{}]*\})*\}', t),
483
+ ]
 
 
 
 
 
 
 
 
 
 
484
 
485
+ for i, strategy in enumerate(strategies, 1):
 
 
 
 
486
  try:
487
+ if i == 2:
488
+ # Strategy 2 returns a string directly
489
+ json_str = strategy(text)
490
+ if not json_str:
491
+ continue
492
+ else:
493
+ match = strategy(text)
494
+ if not match:
495
+ continue
496
+ json_str = match.group(0)
497
+
498
+ logger.info(f"🔍 JSON extraction strategy {i} found: {json_str}")
499
+
500
+ # Clean up the JSON string
501
+ json_str = json_str.strip()
502
+
503
+ # Try to parse
504
+ parsed = json.loads(json_str)
505
+
506
+ # Validate it's a tool request
507
+ if parsed.get("use_tool") is True:
508
+ logger.info(f"✅ Valid tool request parsed: {parsed}")
509
+ return parsed
510
 
511
+ except json.JSONDecodeError as e:
512
+ logger.warning(f"⚠️ JSON parse error with strategy {i}: {e}")
513
+ logger.warning(f"⚠️ Problematic JSON: {json_str if 'json_str' in locals() else 'N/A'}")
514
+ continue
 
515
  except Exception as e:
516
+ logger.warning(f"⚠️ Strategy {i} failed: {e}")
517
+ continue
518
 
519
+ logger.error("❌ Failed to extract valid JSON from response")
520
+ return None
521
 
522
+ def _manual_tool_extraction(self, text: str) -> Optional[Dict[str, Any]]:
523
+ """Manually extract tool information as fallback"""
524
+ import re
 
 
525
 
526
+ logger.info("🔧 Attempting manual tool extraction...")
 
 
 
 
 
 
527
 
528
  try:
529
+ # Extract server name
530
+ server_match = re.search(r'"server":\s*"([^"]+)"', text)
531
+ tool_match = re.search(r'"tool":\s*"([^"]+)"', text)
 
 
 
532
 
533
+ if not server_match or not tool_match:
534
+ logger.warning("⚠️ Could not find server or tool in manual extraction")
535
+ return None
536
 
537
+ server_name = server_match.group(1)
538
+ tool_name = tool_match.group(1)
539
+
540
+ # Try to extract arguments
541
+ args_match = re.search(r'"arguments":\s*\{([^}]+)\}', text)
542
+ arguments = {}
543
+
544
+ if args_match:
545
+ args_content = args_match.group(1)
546
+ # Simple extraction of key-value pairs
547
+ pairs = re.findall(r'"([^"]+)":\s*"([^"]+)"', args_content)
548
+ arguments = dict(pairs)
549
+
550
+ manual_request = {
551
+ "use_tool": True,
552
+ "server": server_name,
553
+ "tool": tool_name,
554
+ "arguments": arguments
555
+ }
556
+
557
+ logger.info(f"🔧 Manual extraction successful: {manual_request}")
558
+ return manual_request
559
 
560
  except Exception as e:
561
+ logger.error(f" Manual extraction failed: {e}")
562
+ return None
563
 
564
+ def _reconstruct_json_from_start(self, text: str) -> Optional[str]:
565
+ """Try to reconstruct JSON if it's truncated"""
566
+ import re
 
 
 
567
 
568
+ # Find start of JSON
569
+ match = re.search(r'\{"use_tool":\s*true[^}]*', text)
570
+ if not match:
571
+ return None
572
 
573
+ json_start = match.start()
574
+ json_part = text[json_start:]
 
 
 
 
 
575
 
576
+ logger.info(f"🔧 Reconstructing JSON from: {json_part[:200]}...")
 
 
 
577
 
578
+ # Try to find the end or reconstruct
579
+ brace_count = 0
580
+ end_pos = 0
581
+ in_string = False
582
+ escape_next = False
583
 
584
+ for i, char in enumerate(json_part):
585
+ if escape_next:
586
+ escape_next = False
587
+ continue
588
+
589
+ if char == '\\':
590
+ escape_next = True
591
+ continue
592
+
593
+ if char == '"' and not escape_next:
594
+ in_string = not in_string
595
+ continue
596
+
597
+ if not in_string:
598
+ if char == '{':
599
+ brace_count += 1
600
+ elif char == '}':
601
+ brace_count -= 1
602
+ if brace_count == 0:
603
+ end_pos = i + 1
604
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
+ if end_pos > 0:
607
+ reconstructed = json_part[:end_pos]
608
+ logger.info(f"🔧 Reconstructed JSON: {reconstructed}")
609
+ return reconstructed
610
+ else:
611
+ # Try to add missing closing braces
612
+ missing_braces = json_part.count('{') - json_part.count('}')
613
+ if missing_braces > 0:
614
+ reconstructed = json_part + '}' * missing_braces
615
+ logger.info(f"🔧 Added {missing_braces} closing braces: {reconstructed}")
616
+ return reconstructed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
 
618
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
 
620
  def _extract_media_from_mcp_response(self, result_text: str, config: MCPServerConfig) -> Optional[str]:
621
+ """Enhanced media extraction from MCP responses with better URL resolution"""
622
  if not isinstance(result_text, str):
623
  logger.info(f"🔍 Non-string result: {type(result_text)}")
624
  return None
 
645
  if media_type in item and isinstance(item[media_type], dict):
646
  media_data = item[media_type]
647
  if 'url' in media_data:
648
+ url = media_data['url'].strip('\'"') # Clean quotes
649
  logger.info(f"🎯 Found {media_type} URL: {url}")
650
  return self._resolve_media_url(url, base_url)
651
 
652
  # Check for direct URL
653
  if 'url' in item:
654
+ url = item['url'].strip('\'"') # Clean quotes
655
  logger.info(f"🎯 Found direct URL: {url}")
656
  return self._resolve_media_url(url, base_url)
657
 
 
664
  if media_type in data and isinstance(data[media_type], dict):
665
  media_data = data[media_type]
666
  if 'url' in media_data:
667
+ url = media_data['url'].strip('\'"') # Clean quotes
668
  logger.info(f"🎯 Found {media_type} URL: {url}")
669
  return self._resolve_media_url(url, base_url)
670
 
671
  # Check for direct URL
672
  if 'url' in data:
673
+ url = data['url'].strip('\'"') # Clean quotes
674
  logger.info(f"🎯 Found direct URL: {url}")
675
  return self._resolve_media_url(url, base_url)
676
 
 
679
  except Exception as e:
680
  logger.warning(f"🔍 JSON parsing error: {e}")
681
 
682
+ # 2. Check for Gradio file URLs (common pattern) with better cleaning
683
+ gradio_file_patterns = [
684
+ r'https://[^/]+\.hf\.space/gradio_api/file=/[^/]+/[^/]+/[^"\s\',]+',
685
+ r'https://[^/]+\.hf\.space/file=[^"\s\',]+',
686
+ r'/gradio_api/file=/[^"\s\',]+'
687
+ ]
688
+
689
+ for pattern in gradio_file_patterns:
690
+ match = re.search(pattern, result_text)
691
+ if match:
692
+ url = match.group(0).rstrip('\'",:;') # Remove trailing punctuation
693
+ logger.info(f"🎯 Found Gradio file URL: {url}")
694
+ if url.startswith('/'):
695
+ url = f"{base_url}{url}"
696
+ return url
697
+
698
+ # 3. Check for simple HTTP URLs in the text
699
+ http_url_pattern = r'https?://[^\s"<>]+'
700
+ matches = re.findall(http_url_pattern, result_text)
701
+ for url in matches:
702
+ if AppConfig.is_media_file(url):
703
+ logger.info(f"🎯 Found HTTP media URL: {url}")
704
+ return url
705
+
706
+ # 4. Check for data URLs (base64 encoded media)
707
  if result_text.startswith('data:'):
708
  logger.info("🎯 Found data URL")
709
  return result_text
710
 
711
+ # 5. For simple file paths, create proper Gradio URLs
 
 
 
 
 
712
  if AppConfig.is_media_file(result_text):
713
  # Extract just the filename if it's a path
714
  if '/' in result_text:
 
716
  else:
717
  filename = result_text.strip()
718
 
719
+ # Create proper Gradio file URL
720
+ media_url = f"{base_url}/file={filename}"
721
+ logger.info(f"🎯 Created media URL from filename: {media_url}")
 
 
 
 
722
  return media_url
723
 
 
 
 
 
 
724
  logger.info("❌ No media detected in result")
725
  return None
726
 
727
  def _resolve_media_url(self, url: str, base_url: str) -> str:
728
+ """Resolve relative URLs to absolute URLs with better handling"""
729
  if url.startswith('http') or url.startswith('data:'):
730
  return url
731
+ elif url.startswith('/gradio_api/file='):
732
+ return f"{base_url}{url}"
733
+ elif url.startswith('/file='):
734
+ return f"{base_url}/gradio_api{url}"
735
+ elif url.startswith('file='):
736
+ return f"{base_url}/gradio_api/{url}"
737
  elif url.startswith('/'):
738
  return f"{base_url}/file={url}"
739
  else:
740
  return f"{base_url}/file={url}"
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  def get_server_status(self) -> Dict[str, str]:
743
  """Get status of all configured servers"""
744
  status = {}
 
756
  elif "localhost" in config.url or "127.0.0.1" in config.url:
757
  return "🟢 Local server (file access available)"
758
  else:
759
+ return "🔴 Remote server (may need public URLs)"