Spaces:
Running
feat: Add real job submission for HuggingFace Jobs and Modal
Browse filesMajor Features:
- Implemented actual job submission using HuggingFace Jobs API (run_job)
- Added Modal job submission module for serverless GPU compute
- Extended Settings screen with comprehensive API key management
Settings Screen Enhancements:
- Added Modal API credentials (Token ID + Secret)
- Added LLM provider API keys field (ENV format) for all major providers
- Fixed security issue: Don't expose existing environment variables in UI
- Added validation and test connection for all API keys
HuggingFace Jobs Integration:
- Uses official run_job() API from huggingface_hub
- Auto-selects appropriate Docker image (python:3.12 for CPU, pytorch with CUDA for GPU)
- Auto-selects hardware flavor based on model size (cpu-basic, t4-small, a10g-large, a100-large)
- Passes all LLM provider API keys as secrets
- Configurable timeout (default: 1h)
- Returns actual HF job ID for tracking
Modal Integration:
- Configures Modal apps with appropriate GPU (T4, A10G, A100, H100, H200, B200)
- Auto-selects GPU based on model size
- Passes API keys via Modal secrets
- Ready for async execution
New Evaluation Form:
- Added timeout configuration field
- Now actually submits jobs instead of just showing configuration
- Displays detailed success/error messages with job IDs
- Shows platform-specific instructions
Security:
- API keys stored in session only (not persisted)
- Sensitive fields use password type
- Environment variables not exposed in UI
- app.py +124 -55
- screens/settings.py +176 -12
- utils/hf_jobs_submission.py +264 -0
- utils/modal_job_submission.py +234 -0
|
@@ -2417,6 +2417,14 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 2417 |
placeholder="UUID will be auto-generated"
|
| 2418 |
)
|
| 2419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2420 |
gr.Markdown("---")
|
| 2421 |
|
| 2422 |
# Cost Estimate Section
|
|
@@ -2609,60 +2617,87 @@ No historical data available for **{model}**.
|
|
| 2609 |
# Test Configuration
|
| 2610 |
dataset_name, split, difficulty, parallel_workers,
|
| 2611 |
# Output & Monitoring
|
| 2612 |
-
output_format, output_dir, enable_otel, enable_gpu_metrics, private, debug, quiet, run_id
|
| 2613 |
):
|
| 2614 |
"""Submit a new evaluation job with comprehensive configuration"""
|
| 2615 |
-
import
|
| 2616 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2617 |
|
| 2618 |
-
#
|
| 2619 |
-
job_id =
|
| 2620 |
-
|
|
|
|
|
|
|
|
|
|
| 2621 |
|
| 2622 |
# Estimate cost
|
| 2623 |
cost_est = estimate_job_cost_with_mcp_fallback(model, hardware)
|
| 2624 |
has_cost_estimate = cost_est is not None
|
| 2625 |
|
| 2626 |
-
# Build CLI command preview
|
| 2627 |
-
cli_command_parts = ["smoltrace-eval"]
|
| 2628 |
-
cli_command_parts.append(f"--model {model}")
|
| 2629 |
-
cli_command_parts.append(f"--provider {provider}")
|
| 2630 |
-
if hf_inference_provider:
|
| 2631 |
-
cli_command_parts.append(f"--hf-inference-provider {hf_inference_provider}")
|
| 2632 |
-
cli_command_parts.append(f"--search-provider {search_provider}")
|
| 2633 |
-
if enable_tools:
|
| 2634 |
-
cli_command_parts.append(f"--enable-tools {','.join(enable_tools)}")
|
| 2635 |
-
if hf_token:
|
| 2636 |
-
cli_command_parts.append("--hf-token $HF_TOKEN")
|
| 2637 |
-
|
| 2638 |
-
cli_command_parts.append(f"--agent-type {agent_type}")
|
| 2639 |
-
|
| 2640 |
-
cli_command_parts.append(f"--dataset-name {dataset_name}")
|
| 2641 |
-
cli_command_parts.append(f"--split {split}")
|
| 2642 |
-
if difficulty != "all":
|
| 2643 |
-
cli_command_parts.append(f"--difficulty {difficulty}")
|
| 2644 |
-
if parallel_workers > 1:
|
| 2645 |
-
cli_command_parts.append(f"--parallel-workers {parallel_workers}")
|
| 2646 |
-
|
| 2647 |
-
cli_command_parts.append(f"--output-format {output_format}")
|
| 2648 |
-
if output_dir and output_format == "json":
|
| 2649 |
-
cli_command_parts.append(f"--output-dir {output_dir}")
|
| 2650 |
-
if enable_otel:
|
| 2651 |
-
cli_command_parts.append("--enable-otel")
|
| 2652 |
-
if not enable_gpu_metrics:
|
| 2653 |
-
cli_command_parts.append("--disable-gpu-metrics")
|
| 2654 |
-
if private:
|
| 2655 |
-
cli_command_parts.append("--private")
|
| 2656 |
-
if debug:
|
| 2657 |
-
cli_command_parts.append("--debug")
|
| 2658 |
-
if quiet:
|
| 2659 |
-
cli_command_parts.append("--quiet")
|
| 2660 |
-
if run_id:
|
| 2661 |
-
cli_command_parts.append(f"--run-id {run_id}")
|
| 2662 |
-
|
| 2663 |
-
cli_command = " \\\n ".join(cli_command_parts)
|
| 2664 |
-
|
| 2665 |
-
# Build success message
|
| 2666 |
cost_info_html = ""
|
| 2667 |
if has_cost_estimate:
|
| 2668 |
source_label = "📊 Historical" if cost_est['source'] == 'historical' else "🤖 MCP Estimate"
|
|
@@ -2690,10 +2725,39 @@ No historical data available for **{model}**.
|
|
| 2690 |
"""
|
| 2691 |
duration_info = "Estimated completion: Will be tracked in leaderboard once job completes"
|
| 2692 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2693 |
success_html = f"""
|
| 2694 |
<div style="background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
|
| 2695 |
padding: 25px; border-radius: 10px; color: white; margin: 15px 0;">
|
| 2696 |
-
<h2 style="margin-top: 0;">✅ Evaluation Job
|
| 2697 |
|
| 2698 |
<div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
|
| 2699 |
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Job ID</div>
|
|
@@ -2702,8 +2766,8 @@ No historical data available for **{model}**.
|
|
| 2702 |
|
| 2703 |
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
|
| 2704 |
<div>
|
| 2705 |
-
<div style="font-size: 0.9em; opacity: 0.9;">
|
| 2706 |
-
<div style="font-weight: bold;">{
|
| 2707 |
</div>
|
| 2708 |
<div>
|
| 2709 |
<div style="font-size: 0.9em; opacity: 0.9;">Model</div>
|
|
@@ -2711,22 +2775,27 @@ No historical data available for **{model}**.
|
|
| 2711 |
</div>
|
| 2712 |
<div>
|
| 2713 |
<div style="font-size: 0.9em; opacity: 0.9;">Hardware</div>
|
| 2714 |
-
<div style="font-weight: bold;">{
|
| 2715 |
</div>
|
| 2716 |
<div>
|
| 2717 |
<div style="font-size: 0.9em; opacity: 0.9;">Agent Type</div>
|
| 2718 |
<div style="font-weight: bold;">{agent_type}</div>
|
| 2719 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2720 |
{cost_info_html}
|
| 2721 |
</div>
|
| 2722 |
|
| 2723 |
-
<div style="margin-top:
|
| 2724 |
-
<div style="font-size: 0.9em;
|
| 2725 |
-
|
| 2726 |
-
{cli_command}
|
| 2727 |
</div>
|
| 2728 |
</div>
|
| 2729 |
|
|
|
|
|
|
|
| 2730 |
<div style="margin-top: 15px; padding: 10px; background: rgba(255,255,255,0.15); border-radius: 5px;">
|
| 2731 |
<div style="font-size: 0.9em;">
|
| 2732 |
⏱️ {duration_info}
|
|
@@ -3544,7 +3613,7 @@ Result: {result}
|
|
| 3544 |
# Test Configuration
|
| 3545 |
eval_dataset_name, eval_split, eval_difficulty, eval_parallel_workers,
|
| 3546 |
# Output & Monitoring
|
| 3547 |
-
eval_output_format, eval_output_dir, eval_enable_otel, eval_enable_gpu_metrics, eval_private, eval_debug, eval_quiet, eval_run_id
|
| 3548 |
],
|
| 3549 |
outputs=[eval_success_message]
|
| 3550 |
)
|
|
|
|
| 2417 |
placeholder="UUID will be auto-generated"
|
| 2418 |
)
|
| 2419 |
|
| 2420 |
+
with gr.Row():
|
| 2421 |
+
eval_timeout = gr.Textbox(
|
| 2422 |
+
value="1h",
|
| 2423 |
+
label="Job Timeout",
|
| 2424 |
+
info="Maximum job duration (e.g., '30m', '1h', '2h')",
|
| 2425 |
+
placeholder="1h"
|
| 2426 |
+
)
|
| 2427 |
+
|
| 2428 |
gr.Markdown("---")
|
| 2429 |
|
| 2430 |
# Cost Estimate Section
|
|
|
|
| 2617 |
# Test Configuration
|
| 2618 |
dataset_name, split, difficulty, parallel_workers,
|
| 2619 |
# Output & Monitoring
|
| 2620 |
+
output_format, output_dir, enable_otel, enable_gpu_metrics, private, debug, quiet, run_id, timeout
|
| 2621 |
):
|
| 2622 |
"""Submit a new evaluation job with comprehensive configuration"""
|
| 2623 |
+
from utils.modal_job_submission import submit_modal_job
|
| 2624 |
+
from utils.hf_jobs_submission import submit_hf_job
|
| 2625 |
+
|
| 2626 |
+
# Submit job based on infrastructure provider
|
| 2627 |
+
if infra_provider == "Modal":
|
| 2628 |
+
result = submit_modal_job(
|
| 2629 |
+
model=model,
|
| 2630 |
+
provider=provider,
|
| 2631 |
+
agent_type=agent_type,
|
| 2632 |
+
hardware=hardware,
|
| 2633 |
+
dataset_name=dataset_name,
|
| 2634 |
+
split=split,
|
| 2635 |
+
difficulty=difficulty,
|
| 2636 |
+
parallel_workers=parallel_workers,
|
| 2637 |
+
hf_token=hf_token,
|
| 2638 |
+
hf_inference_provider=hf_inference_provider,
|
| 2639 |
+
search_provider=search_provider,
|
| 2640 |
+
enable_tools=enable_tools,
|
| 2641 |
+
output_format=output_format,
|
| 2642 |
+
output_dir=output_dir,
|
| 2643 |
+
enable_otel=enable_otel,
|
| 2644 |
+
enable_gpu_metrics=enable_gpu_metrics,
|
| 2645 |
+
private=private,
|
| 2646 |
+
debug=debug,
|
| 2647 |
+
quiet=quiet,
|
| 2648 |
+
run_id=run_id
|
| 2649 |
+
)
|
| 2650 |
+
else: # HuggingFace Jobs
|
| 2651 |
+
result = submit_hf_job(
|
| 2652 |
+
model=model,
|
| 2653 |
+
provider=provider,
|
| 2654 |
+
agent_type=agent_type,
|
| 2655 |
+
hardware=hardware,
|
| 2656 |
+
dataset_name=dataset_name,
|
| 2657 |
+
split=split,
|
| 2658 |
+
difficulty=difficulty,
|
| 2659 |
+
parallel_workers=parallel_workers,
|
| 2660 |
+
hf_token=hf_token,
|
| 2661 |
+
hf_inference_provider=hf_inference_provider,
|
| 2662 |
+
search_provider=search_provider,
|
| 2663 |
+
enable_tools=enable_tools,
|
| 2664 |
+
output_format=output_format,
|
| 2665 |
+
output_dir=output_dir,
|
| 2666 |
+
enable_otel=enable_otel,
|
| 2667 |
+
enable_gpu_metrics=enable_gpu_metrics,
|
| 2668 |
+
private=private,
|
| 2669 |
+
debug=debug,
|
| 2670 |
+
quiet=quiet,
|
| 2671 |
+
run_id=run_id,
|
| 2672 |
+
timeout=timeout or "1h"
|
| 2673 |
+
)
|
| 2674 |
+
|
| 2675 |
+
# Handle submission result
|
| 2676 |
+
if not result.get("success"):
|
| 2677 |
+
# Error occurred
|
| 2678 |
+
error_html = f"""
|
| 2679 |
+
<div style="background: linear-gradient(135deg, #eb3349 0%, #f45c43 100%);
|
| 2680 |
+
padding: 25px; border-radius: 10px; color: white; margin: 15px 0;">
|
| 2681 |
+
<h2 style="margin-top: 0;">❌ Job Submission Failed</h2>
|
| 2682 |
+
<div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
|
| 2683 |
+
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Error</div>
|
| 2684 |
+
<div style="font-size: 1.0em;">{result.get('error', 'Unknown error')}</div>
|
| 2685 |
+
</div>
|
| 2686 |
+
</div>
|
| 2687 |
+
"""
|
| 2688 |
+
return gr.update(value=error_html, visible=True)
|
| 2689 |
|
| 2690 |
+
# Success - build success message
|
| 2691 |
+
job_id = result.get('job_id', 'unknown')
|
| 2692 |
+
job_platform = result.get('platform', infra_provider)
|
| 2693 |
+
job_hardware = result.get('hardware', hardware)
|
| 2694 |
+
job_status = result.get('status', 'submitted')
|
| 2695 |
+
job_message = result.get('message', '')
|
| 2696 |
|
| 2697 |
# Estimate cost
|
| 2698 |
cost_est = estimate_job_cost_with_mcp_fallback(model, hardware)
|
| 2699 |
has_cost_estimate = cost_est is not None
|
| 2700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2701 |
cost_info_html = ""
|
| 2702 |
if has_cost_estimate:
|
| 2703 |
source_label = "📊 Historical" if cost_est['source'] == 'historical' else "🤖 MCP Estimate"
|
|
|
|
| 2725 |
"""
|
| 2726 |
duration_info = "Estimated completion: Will be tracked in leaderboard once job completes"
|
| 2727 |
|
| 2728 |
+
# Add job-specific details
|
| 2729 |
+
job_details_html = ""
|
| 2730 |
+
if result.get('job_yaml'):
|
| 2731 |
+
job_details_html += f"""
|
| 2732 |
+
<div style="margin-top: 20px; padding: 15px; background: rgba(255,255,255,0.15); border-radius: 5px;">
|
| 2733 |
+
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 10px;">📄 Job Configuration (job.yaml)</div>
|
| 2734 |
+
<div style="font-family: monospace; font-size: 0.7em; background: rgba(0,0,0,0.2); padding: 10px; border-radius: 3px; overflow-x: auto; max-height: 300px; overflow-y: auto;">
|
| 2735 |
+
{result['job_yaml']}
|
| 2736 |
+
</div>
|
| 2737 |
+
</div>
|
| 2738 |
+
"""
|
| 2739 |
+
|
| 2740 |
+
if result.get('command'):
|
| 2741 |
+
job_details_html += f"""
|
| 2742 |
+
<div style="margin-top: 15px; padding: 15px; background: rgba(255,255,255,0.15); border-radius: 5px;">
|
| 2743 |
+
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 10px;">📋 SMOLTRACE Command</div>
|
| 2744 |
+
<div style="font-family: monospace; font-size: 0.75em; background: rgba(0,0,0,0.2); padding: 10px; border-radius: 3px; overflow-x: auto;">
|
| 2745 |
+
{result['command']}
|
| 2746 |
+
</div>
|
| 2747 |
+
</div>
|
| 2748 |
+
"""
|
| 2749 |
+
|
| 2750 |
+
if result.get('instructions'):
|
| 2751 |
+
job_details_html += f"""
|
| 2752 |
+
<div style="margin-top: 15px; padding: 15px; background: rgba(255,200,100,0.2); border-radius: 5px; border-left: 4px solid rgba(255,255,255,0.5);">
|
| 2753 |
+
<div style="font-size: 0.85em; white-space: pre-wrap;">{result['instructions']}</div>
|
| 2754 |
+
</div>
|
| 2755 |
+
"""
|
| 2756 |
+
|
| 2757 |
success_html = f"""
|
| 2758 |
<div style="background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
|
| 2759 |
padding: 25px; border-radius: 10px; color: white; margin: 15px 0;">
|
| 2760 |
+
<h2 style="margin-top: 0;">✅ Evaluation Job Configured!</h2>
|
| 2761 |
|
| 2762 |
<div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
|
| 2763 |
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Job ID</div>
|
|
|
|
| 2766 |
|
| 2767 |
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
|
| 2768 |
<div>
|
| 2769 |
+
<div style="font-size: 0.9em; opacity: 0.9;">Platform</div>
|
| 2770 |
+
<div style="font-weight: bold;">{job_platform}</div>
|
| 2771 |
</div>
|
| 2772 |
<div>
|
| 2773 |
<div style="font-size: 0.9em; opacity: 0.9;">Model</div>
|
|
|
|
| 2775 |
</div>
|
| 2776 |
<div>
|
| 2777 |
<div style="font-size: 0.9em; opacity: 0.9;">Hardware</div>
|
| 2778 |
+
<div style="font-weight: bold;">{job_hardware}</div>
|
| 2779 |
</div>
|
| 2780 |
<div>
|
| 2781 |
<div style="font-size: 0.9em; opacity: 0.9;">Agent Type</div>
|
| 2782 |
<div style="font-weight: bold;">{agent_type}</div>
|
| 2783 |
</div>
|
| 2784 |
+
<div>
|
| 2785 |
+
<div style="font-size: 0.9em; opacity: 0.9;">Status</div>
|
| 2786 |
+
<div style="font-weight: bold;">{job_status.upper()}</div>
|
| 2787 |
+
</div>
|
| 2788 |
{cost_info_html}
|
| 2789 |
</div>
|
| 2790 |
|
| 2791 |
+
<div style="margin-top: 15px; padding: 10px; background: rgba(255,255,255,0.15); border-radius: 5px;">
|
| 2792 |
+
<div style="font-size: 0.9em;">
|
| 2793 |
+
ℹ️ {job_message}
|
|
|
|
| 2794 |
</div>
|
| 2795 |
</div>
|
| 2796 |
|
| 2797 |
+
{job_details_html}
|
| 2798 |
+
|
| 2799 |
<div style="margin-top: 15px; padding: 10px; background: rgba(255,255,255,0.15); border-radius: 5px;">
|
| 2800 |
<div style="font-size: 0.9em;">
|
| 2801 |
⏱️ {duration_info}
|
|
|
|
| 3613 |
# Test Configuration
|
| 3614 |
eval_dataset_name, eval_split, eval_difficulty, eval_parallel_workers,
|
| 3615 |
# Output & Monitoring
|
| 3616 |
+
eval_output_format, eval_output_dir, eval_enable_otel, eval_enable_gpu_metrics, eval_private, eval_debug, eval_quiet, eval_run_id, eval_timeout
|
| 3617 |
],
|
| 3618 |
outputs=[eval_success_message]
|
| 3619 |
)
|
|
@@ -1,12 +1,41 @@
|
|
| 1 |
"""
|
| 2 |
Settings Screen for TraceMind-AI
|
| 3 |
-
Allows users to configure API keys for Gemini and
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import os
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def create_settings_screen():
|
| 11 |
"""
|
| 12 |
Create the settings screen for API key configuration
|
|
@@ -58,6 +87,62 @@ def create_settings_screen():
|
|
| 58 |
with gr.Column(scale=1):
|
| 59 |
hf_status = gr.Markdown("⚪ Not configured")
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# Save button
|
| 62 |
with gr.Row():
|
| 63 |
save_btn = gr.Button("💾 Save API Keys", variant="primary")
|
|
@@ -89,6 +174,34 @@ def create_settings_screen():
|
|
| 89 |
5. Create and copy the token (starts with `hf_...`)
|
| 90 |
|
| 91 |
**Note**: Read-only access is sufficient for viewing datasets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
""")
|
| 93 |
|
| 94 |
with gr.Accordion("🔒 Privacy & Security", open=False):
|
|
@@ -119,7 +232,7 @@ def create_settings_screen():
|
|
| 119 |
""")
|
| 120 |
|
| 121 |
# Define save functionality
|
| 122 |
-
def save_api_keys(gemini_key, hf_key):
|
| 123 |
"""Save API keys to session"""
|
| 124 |
messages = []
|
| 125 |
|
|
@@ -133,7 +246,6 @@ def create_settings_screen():
|
|
| 133 |
messages.append("⚠️ Invalid Gemini API key format (should start with 'AIza')")
|
| 134 |
gemini_status_text = "❌ Invalid format"
|
| 135 |
else:
|
| 136 |
-
messages.append("⚠️ Gemini API key not provided")
|
| 137 |
gemini_status_text = "⚪ Not configured"
|
| 138 |
|
| 139 |
# Validate and save HuggingFace token
|
|
@@ -146,15 +258,47 @@ def create_settings_screen():
|
|
| 146 |
messages.append("⚠️ Invalid HuggingFace token format (should start with 'hf_')")
|
| 147 |
hf_status_text = "❌ Invalid format"
|
| 148 |
else:
|
| 149 |
-
messages.append("⚠️ HuggingFace token not provided")
|
| 150 |
hf_status_text = "⚪ Not configured"
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
-
return status_msg, gemini_status_text, hf_status_text
|
| 156 |
|
| 157 |
-
def test_api_keys(gemini_key, hf_key):
|
| 158 |
"""Test API key connections"""
|
| 159 |
results = []
|
| 160 |
|
|
@@ -164,7 +308,7 @@ def create_settings_screen():
|
|
| 164 |
import google.generativeai as genai
|
| 165 |
genai.configure(api_key=gemini_key.strip())
|
| 166 |
# Try to list models as a test
|
| 167 |
-
models = genai.list_models()
|
| 168 |
results.append("✅ **Gemini API**: Connection successful!")
|
| 169 |
except Exception as e:
|
| 170 |
results.append(f"❌ **Gemini API**: Connection failed - {str(e)}")
|
|
@@ -184,19 +328,39 @@ def create_settings_screen():
|
|
| 184 |
else:
|
| 185 |
results.append("⚠️ **HuggingFace**: No token provided")
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
return "\n\n".join(results)
|
| 188 |
|
| 189 |
# Wire up button events (api_name=False to prevent API key exposure)
|
| 190 |
save_btn.click(
|
| 191 |
fn=save_api_keys,
|
| 192 |
-
inputs=[gemini_api_key, hf_token],
|
| 193 |
-
outputs=[status_message, gemini_status, hf_status],
|
| 194 |
api_name=False # IMPORTANT: Prevents API key exposure via Gradio API
|
| 195 |
)
|
| 196 |
|
| 197 |
test_btn.click(
|
| 198 |
fn=test_api_keys,
|
| 199 |
-
inputs=[gemini_api_key, hf_token],
|
| 200 |
outputs=[status_message],
|
| 201 |
api_name=False # IMPORTANT: Prevents API key exposure via Gradio API
|
| 202 |
)
|
|
|
|
| 1 |
"""
|
| 2 |
Settings Screen for TraceMind-AI
|
| 3 |
+
Allows users to configure API keys for Gemini, HuggingFace, Modal, and LLM providers
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import os
|
| 8 |
|
| 9 |
|
| 10 |
+
# Note: Removed _get_llm_keys_as_env_string() to prevent exposing environment variables
|
| 11 |
+
# in the UI for security reasons. Users should explicitly enter keys needed for jobs.
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _parse_env_string(env_string):
|
| 15 |
+
"""
|
| 16 |
+
Parse ENV-formatted string into a dictionary
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
env_string: Multi-line string in KEY=value format
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
dict: Parsed key-value pairs
|
| 23 |
+
"""
|
| 24 |
+
result = {}
|
| 25 |
+
if not env_string:
|
| 26 |
+
return result
|
| 27 |
+
|
| 28 |
+
for line in env_string.strip().split("\n"):
|
| 29 |
+
line = line.strip()
|
| 30 |
+
if not line or line.startswith("#"):
|
| 31 |
+
continue
|
| 32 |
+
if "=" in line:
|
| 33 |
+
key, value = line.split("=", 1)
|
| 34 |
+
result[key.strip()] = value.strip()
|
| 35 |
+
|
| 36 |
+
return result
|
| 37 |
+
|
| 38 |
+
|
| 39 |
def create_settings_screen():
|
| 40 |
"""
|
| 41 |
Create the settings screen for API key configuration
|
|
|
|
| 87 |
with gr.Column(scale=1):
|
| 88 |
hf_status = gr.Markdown("⚪ Not configured")
|
| 89 |
|
| 90 |
+
# Modal API Key
|
| 91 |
+
with gr.Row():
|
| 92 |
+
with gr.Column(scale=4):
|
| 93 |
+
modal_api_key = gr.Textbox(
|
| 94 |
+
label="Modal API Key (Optional)",
|
| 95 |
+
placeholder="Enter your Modal API key (starts with 'ak-...')",
|
| 96 |
+
type="password",
|
| 97 |
+
value=os.environ.get("MODAL_TOKEN_ID", ""),
|
| 98 |
+
info="Get your key at: https://modal.com/settings/tokens"
|
| 99 |
+
)
|
| 100 |
+
with gr.Column(scale=1):
|
| 101 |
+
modal_status = gr.Markdown("⚪ Not configured")
|
| 102 |
+
|
| 103 |
+
# Modal API Secret
|
| 104 |
+
with gr.Row():
|
| 105 |
+
with gr.Column(scale=4):
|
| 106 |
+
modal_api_secret = gr.Textbox(
|
| 107 |
+
label="Modal API Secret (Optional)",
|
| 108 |
+
placeholder="Enter your Modal API secret (starts with 'as-...')",
|
| 109 |
+
type="password",
|
| 110 |
+
value=os.environ.get("MODAL_TOKEN_SECRET", ""),
|
| 111 |
+
info="Required if using Modal for job execution"
|
| 112 |
+
)
|
| 113 |
+
with gr.Column(scale=1):
|
| 114 |
+
modal_secret_status = gr.Markdown("⚪ Not configured")
|
| 115 |
+
|
| 116 |
+
# LLM Provider API Keys (Multi-line for convenience)
|
| 117 |
+
gr.Markdown("""
|
| 118 |
+
### LLM Provider API Keys (Optional)
|
| 119 |
+
|
| 120 |
+
Paste your API keys in ENV format below. These are needed for running evaluations with API-based models.
|
| 121 |
+
""")
|
| 122 |
+
|
| 123 |
+
llm_api_keys = gr.Textbox(
|
| 124 |
+
label="LLM Provider API Keys",
|
| 125 |
+
placeholder="""OPENAI_API_KEY=sk-...
|
| 126 |
+
ANTHROPIC_API_KEY=sk-ant-...
|
| 127 |
+
GOOGLE_API_KEY=AIza...
|
| 128 |
+
GEMINI_API_KEY=AIza...
|
| 129 |
+
COHERE_API_KEY=...
|
| 130 |
+
MISTRAL_API_KEY=...
|
| 131 |
+
TOGETHER_API_KEY=...
|
| 132 |
+
GROQ_API_KEY=gsk_...
|
| 133 |
+
REPLICATE_API_TOKEN=r8_...
|
| 134 |
+
ANYSCALE_API_KEY=...
|
| 135 |
+
AWS_ACCESS_KEY_ID=...
|
| 136 |
+
AWS_SECRET_ACCESS_KEY=...
|
| 137 |
+
AWS_REGION=us-west-2
|
| 138 |
+
AZURE_OPENAI_API_KEY=...
|
| 139 |
+
AZURE_OPENAI_ENDPOINT=https://...
|
| 140 |
+
LITELLM_API_KEY=...""",
|
| 141 |
+
lines=10,
|
| 142 |
+
value="", # Don't expose existing env vars
|
| 143 |
+
info="Enter one key=value per line. These will be passed to evaluation jobs."
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
# Save button
|
| 147 |
with gr.Row():
|
| 148 |
save_btn = gr.Button("💾 Save API Keys", variant="primary")
|
|
|
|
| 174 |
5. Create and copy the token (starts with `hf_...`)
|
| 175 |
|
| 176 |
**Note**: Read-only access is sufficient for viewing datasets
|
| 177 |
+
|
| 178 |
+
---
|
| 179 |
+
|
| 180 |
+
### Modal API Credentials (Optional)
|
| 181 |
+
|
| 182 |
+
1. Go to [Modal Settings](https://modal.com/settings/tokens)
|
| 183 |
+
2. Click "Create new token"
|
| 184 |
+
3. Copy both:
|
| 185 |
+
- Token ID (starts with `ak-...`)
|
| 186 |
+
- Token Secret (starts with `as-...`)
|
| 187 |
+
|
| 188 |
+
**Why Modal?** Run evaluation jobs on serverless GPU compute with per-second billing.
|
| 189 |
+
|
| 190 |
+
---
|
| 191 |
+
|
| 192 |
+
### LLM Provider API Keys (Optional)
|
| 193 |
+
|
| 194 |
+
These keys enable running evaluations with different model providers:
|
| 195 |
+
|
| 196 |
+
- **OpenAI**: [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
|
| 197 |
+
- **Anthropic**: [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys)
|
| 198 |
+
- **Google (Vertex AI)**: [Google Cloud Console](https://console.cloud.google.com/)
|
| 199 |
+
- **Cohere**: [dashboard.cohere.ai/api-keys](https://dashboard.cohere.ai/api-keys)
|
| 200 |
+
- **Mistral**: [console.mistral.ai/api-keys](https://console.mistral.ai/api-keys)
|
| 201 |
+
- **Together AI**: [api.together.xyz/settings/api-keys](https://api.together.xyz/settings/api-keys)
|
| 202 |
+
- **Groq**: [console.groq.com/keys](https://console.groq.com/keys)
|
| 203 |
+
|
| 204 |
+
Copy and paste them in the `KEY=value` format.
|
| 205 |
""")
|
| 206 |
|
| 207 |
with gr.Accordion("🔒 Privacy & Security", open=False):
|
|
|
|
| 232 |
""")
|
| 233 |
|
| 234 |
# Define save functionality
|
| 235 |
+
def save_api_keys(gemini_key, hf_key, modal_key, modal_secret, llm_keys_text):
|
| 236 |
"""Save API keys to session"""
|
| 237 |
messages = []
|
| 238 |
|
|
|
|
| 246 |
messages.append("⚠️ Invalid Gemini API key format (should start with 'AIza')")
|
| 247 |
gemini_status_text = "❌ Invalid format"
|
| 248 |
else:
|
|
|
|
| 249 |
gemini_status_text = "⚪ Not configured"
|
| 250 |
|
| 251 |
# Validate and save HuggingFace token
|
|
|
|
| 258 |
messages.append("⚠️ Invalid HuggingFace token format (should start with 'hf_')")
|
| 259 |
hf_status_text = "❌ Invalid format"
|
| 260 |
else:
|
|
|
|
| 261 |
hf_status_text = "⚪ Not configured"
|
| 262 |
|
| 263 |
+
# Validate and save Modal API key
|
| 264 |
+
if modal_key and modal_key.strip():
|
| 265 |
+
if modal_key.startswith("ak-"):
|
| 266 |
+
os.environ["MODAL_TOKEN_ID"] = modal_key.strip()
|
| 267 |
+
messages.append("✅ Modal API key saved")
|
| 268 |
+
modal_status_text = "✅ Configured"
|
| 269 |
+
else:
|
| 270 |
+
messages.append("⚠️ Invalid Modal API key format (should start with 'ak-')")
|
| 271 |
+
modal_status_text = "❌ Invalid format"
|
| 272 |
+
else:
|
| 273 |
+
modal_status_text = "⚪ Not configured"
|
| 274 |
+
|
| 275 |
+
# Validate and save Modal API secret
|
| 276 |
+
if modal_secret and modal_secret.strip():
|
| 277 |
+
if modal_secret.startswith("as-"):
|
| 278 |
+
os.environ["MODAL_TOKEN_SECRET"] = modal_secret.strip()
|
| 279 |
+
messages.append("✅ Modal API secret saved")
|
| 280 |
+
modal_secret_status_text = "✅ Configured"
|
| 281 |
+
else:
|
| 282 |
+
messages.append("⚠️ Invalid Modal API secret format (should start with 'as-')")
|
| 283 |
+
modal_secret_status_text = "❌ Invalid format"
|
| 284 |
+
else:
|
| 285 |
+
modal_secret_status_text = "⚪ Not configured"
|
| 286 |
+
|
| 287 |
+
# Parse and save LLM provider API keys
|
| 288 |
+
llm_keys_count = 0
|
| 289 |
+
if llm_keys_text and llm_keys_text.strip():
|
| 290 |
+
parsed_keys = _parse_env_string(llm_keys_text)
|
| 291 |
+
for key, value in parsed_keys.items():
|
| 292 |
+
os.environ[key] = value
|
| 293 |
+
llm_keys_count += 1
|
| 294 |
+
messages.append(f"✅ {llm_keys_count} LLM provider API key(s) saved")
|
| 295 |
+
|
| 296 |
+
status_msg = "\n\n".join(messages) if messages else "No changes made"
|
| 297 |
+
status_msg += "\n\n**Note**: Keys are saved for this session only and will be used for evaluation jobs."
|
| 298 |
|
| 299 |
+
return status_msg, gemini_status_text, hf_status_text, modal_status_text, modal_secret_status_text
|
| 300 |
|
| 301 |
+
def test_api_keys(gemini_key, hf_key, modal_key, modal_secret, llm_keys_text):
|
| 302 |
"""Test API key connections"""
|
| 303 |
results = []
|
| 304 |
|
|
|
|
| 308 |
import google.generativeai as genai
|
| 309 |
genai.configure(api_key=gemini_key.strip())
|
| 310 |
# Try to list models as a test
|
| 311 |
+
models = list(genai.list_models())
|
| 312 |
results.append("✅ **Gemini API**: Connection successful!")
|
| 313 |
except Exception as e:
|
| 314 |
results.append(f"❌ **Gemini API**: Connection failed - {str(e)}")
|
|
|
|
| 328 |
else:
|
| 329 |
results.append("⚠️ **HuggingFace**: No token provided")
|
| 330 |
|
| 331 |
+
# Test Modal API
|
| 332 |
+
if modal_key and modal_key.strip() and modal_secret and modal_secret.strip():
|
| 333 |
+
try:
|
| 334 |
+
import modal
|
| 335 |
+
# Modal validates credentials on first use, not at import
|
| 336 |
+
# We'll just validate format here
|
| 337 |
+
if modal_key.startswith("ak-") and modal_secret.startswith("as-"):
|
| 338 |
+
results.append("✅ **Modal**: Credentials format valid (will be verified on first job submission)")
|
| 339 |
+
else:
|
| 340 |
+
results.append("❌ **Modal**: Invalid credential format")
|
| 341 |
+
except Exception as e:
|
| 342 |
+
results.append(f"⚠�� **Modal**: {str(e)}")
|
| 343 |
+
elif modal_key or modal_secret:
|
| 344 |
+
results.append("⚠️ **Modal**: Both API key and secret required")
|
| 345 |
+
|
| 346 |
+
# Note about LLM provider keys
|
| 347 |
+
if llm_keys_text and llm_keys_text.strip():
|
| 348 |
+
parsed_keys = _parse_env_string(llm_keys_text)
|
| 349 |
+
results.append(f"ℹ️ **LLM Providers**: {len(parsed_keys)} key(s) configured (will be validated when used)")
|
| 350 |
+
|
| 351 |
return "\n\n".join(results)
|
| 352 |
|
| 353 |
# Wire up button events (api_name=False to prevent API key exposure)
|
| 354 |
save_btn.click(
|
| 355 |
fn=save_api_keys,
|
| 356 |
+
inputs=[gemini_api_key, hf_token, modal_api_key, modal_api_secret, llm_api_keys],
|
| 357 |
+
outputs=[status_message, gemini_status, hf_status, modal_status, modal_secret_status],
|
| 358 |
api_name=False # IMPORTANT: Prevents API key exposure via Gradio API
|
| 359 |
)
|
| 360 |
|
| 361 |
test_btn.click(
|
| 362 |
fn=test_api_keys,
|
| 363 |
+
inputs=[gemini_api_key, hf_token, modal_api_key, modal_api_secret, llm_api_keys],
|
| 364 |
outputs=[status_message],
|
| 365 |
api_name=False # IMPORTANT: Prevents API key exposure via Gradio API
|
| 366 |
)
|
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace Jobs Submission Module
|
| 3 |
+
|
| 4 |
+
Handles submission of SMOLTRACE evaluation jobs to HuggingFace Jobs platform.
|
| 5 |
+
Uses the official HuggingFace Jobs API: `huggingface_hub.run_job()`
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import uuid
|
| 10 |
+
from typing import Dict, Optional, List
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def submit_hf_job(
|
| 14 |
+
model: str,
|
| 15 |
+
provider: str,
|
| 16 |
+
agent_type: str,
|
| 17 |
+
hardware: str,
|
| 18 |
+
dataset_name: str,
|
| 19 |
+
split: str = "train",
|
| 20 |
+
difficulty: str = "all",
|
| 21 |
+
parallel_workers: int = 1,
|
| 22 |
+
hf_token: Optional[str] = None,
|
| 23 |
+
hf_inference_provider: Optional[str] = None,
|
| 24 |
+
search_provider: str = "duckduckgo",
|
| 25 |
+
enable_tools: Optional[List[str]] = None,
|
| 26 |
+
output_format: str = "hub",
|
| 27 |
+
output_dir: Optional[str] = None,
|
| 28 |
+
enable_otel: bool = True,
|
| 29 |
+
enable_gpu_metrics: bool = True,
|
| 30 |
+
private: bool = False,
|
| 31 |
+
debug: bool = False,
|
| 32 |
+
quiet: bool = False,
|
| 33 |
+
run_id: Optional[str] = None,
|
| 34 |
+
timeout: str = "1h"
|
| 35 |
+
) -> Dict:
|
| 36 |
+
"""
|
| 37 |
+
Submit an evaluation job to HuggingFace Jobs using the run_job API
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
model: Model identifier (e.g., "openai/gpt-4")
|
| 41 |
+
provider: Provider type ("litellm", "inference", "transformers")
|
| 42 |
+
agent_type: Agent type ("tool", "code", "both")
|
| 43 |
+
hardware: Hardware type (e.g., "auto", "cpu-basic", "t4-small", "a10g-small")
|
| 44 |
+
dataset_name: HuggingFace dataset for evaluation
|
| 45 |
+
split: Dataset split to use
|
| 46 |
+
difficulty: Difficulty filter
|
| 47 |
+
parallel_workers: Number of parallel workers
|
| 48 |
+
hf_token: HuggingFace token
|
| 49 |
+
hf_inference_provider: HF Inference provider
|
| 50 |
+
search_provider: Search provider for agents
|
| 51 |
+
enable_tools: List of tools to enable
|
| 52 |
+
output_format: Output format ("hub" or "json")
|
| 53 |
+
output_dir: Output directory for JSON format
|
| 54 |
+
enable_otel: Enable OpenTelemetry tracing
|
| 55 |
+
enable_gpu_metrics: Enable GPU metrics collection
|
| 56 |
+
private: Make datasets private
|
| 57 |
+
debug: Enable debug mode
|
| 58 |
+
quiet: Enable quiet mode
|
| 59 |
+
run_id: Optional run ID (auto-generated if not provided)
|
| 60 |
+
timeout: Job timeout (default: "1h")
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
dict: Job submission result with job_id, status, and details
|
| 64 |
+
"""
|
| 65 |
+
try:
|
| 66 |
+
from huggingface_hub import run_job
|
| 67 |
+
except ImportError:
|
| 68 |
+
return {
|
| 69 |
+
"success": False,
|
| 70 |
+
"error": "huggingface_hub package not installed or outdated. Install with: pip install -U huggingface_hub",
|
| 71 |
+
"job_id": None
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# Validate HF token
|
| 75 |
+
token = hf_token or os.environ.get("HF_TOKEN")
|
| 76 |
+
if not token:
|
| 77 |
+
return {
|
| 78 |
+
"success": False,
|
| 79 |
+
"error": "HuggingFace token not configured. Please set HF_TOKEN in Settings.",
|
| 80 |
+
"job_id": None
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
# Generate job ID
|
| 84 |
+
job_id = run_id if run_id else f"job_{uuid.uuid4().hex[:8]}"
|
| 85 |
+
|
| 86 |
+
# Map hardware to HF Jobs flavor
|
| 87 |
+
if hardware == "auto":
|
| 88 |
+
flavor = _auto_select_hf_hardware(provider, model)
|
| 89 |
+
else:
|
| 90 |
+
flavor = hardware
|
| 91 |
+
|
| 92 |
+
# Determine if this is a GPU job
|
| 93 |
+
is_gpu_job = flavor not in ["cpu-basic", "cpu-upgrade"]
|
| 94 |
+
|
| 95 |
+
# Select appropriate Docker image
|
| 96 |
+
if is_gpu_job:
|
| 97 |
+
# GPU jobs use PyTorch with CUDA
|
| 98 |
+
image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
|
| 99 |
+
pip_packages = "smoltrace ddgs smoltrace[gpu]"
|
| 100 |
+
else:
|
| 101 |
+
# CPU jobs use standard Python
|
| 102 |
+
image = "python:3.12"
|
| 103 |
+
pip_packages = "smoltrace ddgs"
|
| 104 |
+
|
| 105 |
+
# Build secrets dictionary
|
| 106 |
+
secrets = {
|
| 107 |
+
"HF_TOKEN": token
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
# Add LLM provider API keys from environment
|
| 111 |
+
llm_key_names = [
|
| 112 |
+
"OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY",
|
| 113 |
+
"GEMINI_API_KEY", "COHERE_API_KEY", "MISTRAL_API_KEY",
|
| 114 |
+
"TOGETHER_API_KEY", "GROQ_API_KEY", "REPLICATE_API_TOKEN",
|
| 115 |
+
"ANYSCALE_API_KEY", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
|
| 116 |
+
"AWS_REGION", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT",
|
| 117 |
+
"LITELLM_API_KEY"
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
for key_name in llm_key_names:
|
| 121 |
+
value = os.environ.get(key_name)
|
| 122 |
+
if value:
|
| 123 |
+
secrets[key_name] = value
|
| 124 |
+
|
| 125 |
+
# Build SMOLTRACE command
|
| 126 |
+
cmd_parts = ["smoltrace-eval"]
|
| 127 |
+
cmd_parts.append(f"--model {model}")
|
| 128 |
+
cmd_parts.append(f"--provider {provider}")
|
| 129 |
+
if hf_inference_provider:
|
| 130 |
+
cmd_parts.append(f"--hf-inference-provider {hf_inference_provider}")
|
| 131 |
+
cmd_parts.append(f"--search-provider {search_provider}")
|
| 132 |
+
if enable_tools:
|
| 133 |
+
cmd_parts.append(f"--enable-tools {','.join(enable_tools)}")
|
| 134 |
+
cmd_parts.append(f"--agent-type {agent_type}")
|
| 135 |
+
cmd_parts.append(f"--dataset-name {dataset_name}")
|
| 136 |
+
cmd_parts.append(f"--split {split}")
|
| 137 |
+
if difficulty != "all":
|
| 138 |
+
cmd_parts.append(f"--difficulty {difficulty}")
|
| 139 |
+
if parallel_workers > 1:
|
| 140 |
+
cmd_parts.append(f"--parallel-workers {parallel_workers}")
|
| 141 |
+
cmd_parts.append(f"--output-format {output_format}")
|
| 142 |
+
if output_dir and output_format == "json":
|
| 143 |
+
cmd_parts.append(f"--output-dir {output_dir}")
|
| 144 |
+
if enable_otel:
|
| 145 |
+
cmd_parts.append("--enable-otel")
|
| 146 |
+
if not enable_gpu_metrics:
|
| 147 |
+
cmd_parts.append("--disable-gpu-metrics")
|
| 148 |
+
if private:
|
| 149 |
+
cmd_parts.append("--private")
|
| 150 |
+
if debug:
|
| 151 |
+
cmd_parts.append("--debug")
|
| 152 |
+
if quiet:
|
| 153 |
+
cmd_parts.append("--quiet")
|
| 154 |
+
cmd_parts.append(f"--run-id {job_id}")
|
| 155 |
+
|
| 156 |
+
smoltrace_command = " ".join(cmd_parts)
|
| 157 |
+
|
| 158 |
+
# Build full command with pip install
|
| 159 |
+
full_command = f"pip install {pip_packages} && {smoltrace_command}"
|
| 160 |
+
|
| 161 |
+
# Submit job using HuggingFace Jobs API
|
| 162 |
+
try:
|
| 163 |
+
job = run_job(
|
| 164 |
+
image=image,
|
| 165 |
+
command=["bash", "-c", full_command],
|
| 166 |
+
secrets=secrets,
|
| 167 |
+
flavor=flavor,
|
| 168 |
+
timeout=timeout
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
return {
|
| 172 |
+
"success": True,
|
| 173 |
+
"job_id": job_id,
|
| 174 |
+
"hf_job_id": job.job_id if hasattr(job, 'job_id') else str(job),
|
| 175 |
+
"platform": "HuggingFace Jobs",
|
| 176 |
+
"hardware": flavor,
|
| 177 |
+
"image": image,
|
| 178 |
+
"command": smoltrace_command,
|
| 179 |
+
"status": "submitted",
|
| 180 |
+
"message": f"Job successfully submitted to HuggingFace Jobs (flavor: {flavor})",
|
| 181 |
+
"instructions": f"""
|
| 182 |
+
✅ Job submitted successfully!
|
| 183 |
+
|
| 184 |
+
**Job Details:**
|
| 185 |
+
- Flavor: {flavor}
|
| 186 |
+
- Image: {image}
|
| 187 |
+
- Timeout: {timeout}
|
| 188 |
+
|
| 189 |
+
**Monitor your job:**
|
| 190 |
+
- View job status: https://huggingface.co/jobs
|
| 191 |
+
- HF Job ID: {job.job_id if hasattr(job, 'job_id') else 'check dashboard'}
|
| 192 |
+
|
| 193 |
+
**What happens next:**
|
| 194 |
+
1. Job starts running on HuggingFace infrastructure
|
| 195 |
+
2. SMOLTRACE evaluates your model
|
| 196 |
+
3. Results are automatically pushed to HuggingFace datasets
|
| 197 |
+
4. They will appear in TraceMind leaderboard when complete
|
| 198 |
+
""".strip()
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
except Exception as e:
|
| 202 |
+
return {
|
| 203 |
+
"success": False,
|
| 204 |
+
"error": f"Failed to submit job to HuggingFace: {str(e)}",
|
| 205 |
+
"job_id": job_id,
|
| 206 |
+
"command": smoltrace_command,
|
| 207 |
+
"debug_info": {
|
| 208 |
+
"image": image,
|
| 209 |
+
"flavor": flavor,
|
| 210 |
+
"timeout": timeout,
|
| 211 |
+
"secrets_configured": list(secrets.keys())
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _auto_select_hf_hardware(provider: str, model: str) -> str:
|
| 217 |
+
"""
|
| 218 |
+
Automatically select HuggingFace Jobs hardware based on model and provider
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
provider: Provider type
|
| 222 |
+
model: Model identifier
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
str: HF Jobs flavor
|
| 226 |
+
"""
|
| 227 |
+
# API models only need CPU
|
| 228 |
+
if provider in ["litellm", "inference"]:
|
| 229 |
+
return "cpu-basic"
|
| 230 |
+
|
| 231 |
+
# Local models need GPU - select based on model size
|
| 232 |
+
model_lower = model.lower()
|
| 233 |
+
|
| 234 |
+
if "70b" in model_lower or "65b" in model_lower:
|
| 235 |
+
# Large models need high-end GPU
|
| 236 |
+
return "a100-large"
|
| 237 |
+
elif "13b" in model_lower or "34b" in model_lower:
|
| 238 |
+
# Medium models work on A10
|
| 239 |
+
return "a10g-large"
|
| 240 |
+
elif "7b" in model_lower or "8b" in model_lower or "4b" in model_lower:
|
| 241 |
+
# Small models efficient on T4 or A10
|
| 242 |
+
return "t4-small" # More cost-effective for small models
|
| 243 |
+
else:
|
| 244 |
+
# Default to T4 for unknown sizes
|
| 245 |
+
return "t4-small"
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def check_job_status(job_id: str, hf_token: Optional[str] = None) -> Dict:
|
| 249 |
+
"""
|
| 250 |
+
Check the status of a HuggingFace Job
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
job_id: Job ID to check
|
| 254 |
+
hf_token: HuggingFace token (optional, uses env if not provided)
|
| 255 |
+
|
| 256 |
+
Returns:
|
| 257 |
+
dict: Job status information
|
| 258 |
+
"""
|
| 259 |
+
# Placeholder for when HF Jobs API becomes available
|
| 260 |
+
return {
|
| 261 |
+
"job_id": job_id,
|
| 262 |
+
"status": "unknown",
|
| 263 |
+
"message": "HuggingFace Jobs status API not yet available programmatically"
|
| 264 |
+
}
|
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modal Job Submission Module
|
| 3 |
+
|
| 4 |
+
Handles submission of SMOLTRACE evaluation jobs to Modal's serverless compute platform.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import uuid
|
| 9 |
+
from typing import Dict, Optional, List
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def submit_modal_job(
|
| 13 |
+
model: str,
|
| 14 |
+
provider: str,
|
| 15 |
+
agent_type: str,
|
| 16 |
+
hardware: str,
|
| 17 |
+
dataset_name: str,
|
| 18 |
+
split: str = "train",
|
| 19 |
+
difficulty: str = "all",
|
| 20 |
+
parallel_workers: int = 1,
|
| 21 |
+
hf_token: Optional[str] = None,
|
| 22 |
+
hf_inference_provider: Optional[str] = None,
|
| 23 |
+
search_provider: str = "duckduckgo",
|
| 24 |
+
enable_tools: Optional[List[str]] = None,
|
| 25 |
+
output_format: str = "hub",
|
| 26 |
+
output_dir: Optional[str] = None,
|
| 27 |
+
enable_otel: bool = True,
|
| 28 |
+
enable_gpu_metrics: bool = True,
|
| 29 |
+
private: bool = False,
|
| 30 |
+
debug: bool = False,
|
| 31 |
+
quiet: bool = False,
|
| 32 |
+
run_id: Optional[str] = None
|
| 33 |
+
) -> Dict:
|
| 34 |
+
"""
|
| 35 |
+
Submit an evaluation job to Modal
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
model: Model identifier (e.g., "openai/gpt-4")
|
| 39 |
+
provider: Provider type ("litellm", "inference", "transformers")
|
| 40 |
+
agent_type: Agent type ("tool", "code", "both")
|
| 41 |
+
hardware: Hardware type (e.g., "auto", "gpu_a10", "gpu_h200")
|
| 42 |
+
dataset_name: HuggingFace dataset for evaluation
|
| 43 |
+
split: Dataset split to use
|
| 44 |
+
difficulty: Difficulty filter
|
| 45 |
+
parallel_workers: Number of parallel workers
|
| 46 |
+
hf_token: HuggingFace token
|
| 47 |
+
hf_inference_provider: HF Inference provider
|
| 48 |
+
search_provider: Search provider for agents
|
| 49 |
+
enable_tools: List of tools to enable
|
| 50 |
+
output_format: Output format ("hub" or "json")
|
| 51 |
+
output_dir: Output directory for JSON format
|
| 52 |
+
enable_otel: Enable OpenTelemetry tracing
|
| 53 |
+
enable_gpu_metrics: Enable GPU metrics collection
|
| 54 |
+
private: Make datasets private
|
| 55 |
+
debug: Enable debug mode
|
| 56 |
+
quiet: Enable quiet mode
|
| 57 |
+
run_id: Optional run ID (auto-generated if not provided)
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
dict: Job submission result with job_id, status, and details
|
| 61 |
+
"""
|
| 62 |
+
try:
|
| 63 |
+
import modal
|
| 64 |
+
except ImportError:
|
| 65 |
+
return {
|
| 66 |
+
"success": False,
|
| 67 |
+
"error": "Modal package not installed. Install with: pip install modal",
|
| 68 |
+
"job_id": None
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Validate Modal credentials
|
| 72 |
+
modal_token_id = os.environ.get("MODAL_TOKEN_ID")
|
| 73 |
+
modal_token_secret = os.environ.get("MODAL_TOKEN_SECRET")
|
| 74 |
+
|
| 75 |
+
if not modal_token_id or not modal_token_secret:
|
| 76 |
+
return {
|
| 77 |
+
"success": False,
|
| 78 |
+
"error": "Modal credentials not configured. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in Settings.",
|
| 79 |
+
"job_id": None
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
# Generate job ID
|
| 83 |
+
job_id = run_id if run_id else f"job_{uuid.uuid4().hex[:8]}"
|
| 84 |
+
|
| 85 |
+
# Map hardware to Modal GPU types
|
| 86 |
+
hardware_map = {
|
| 87 |
+
"auto": _auto_select_modal_hardware(provider, model),
|
| 88 |
+
"cpu": None, # CPU only
|
| 89 |
+
"gpu_t4": "T4",
|
| 90 |
+
"gpu_l4": "L4",
|
| 91 |
+
"gpu_a10": "A10G",
|
| 92 |
+
"gpu_l40s": "L40S",
|
| 93 |
+
"gpu_a100": "A100",
|
| 94 |
+
"gpu_a100_80gb": "A100-80GB",
|
| 95 |
+
"gpu_h100": "H100",
|
| 96 |
+
"gpu_h200": "H200",
|
| 97 |
+
"gpu_b200": "B200"
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
modal_gpu = hardware_map.get(hardware, "A10G")
|
| 101 |
+
|
| 102 |
+
# Build environment variables
|
| 103 |
+
env_vars = {
|
| 104 |
+
"HF_TOKEN": hf_token or os.environ.get("HF_TOKEN", ""),
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Add LLM provider API keys from environment
|
| 108 |
+
llm_key_names = [
|
| 109 |
+
"OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY",
|
| 110 |
+
"GEMINI_API_KEY", "COHERE_API_KEY", "MISTRAL_API_KEY",
|
| 111 |
+
"TOGETHER_API_KEY", "GROQ_API_KEY", "REPLICATE_API_TOKEN",
|
| 112 |
+
"ANYSCALE_API_KEY", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
|
| 113 |
+
"AWS_REGION", "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT",
|
| 114 |
+
"LITELLM_API_KEY"
|
| 115 |
+
]
|
| 116 |
+
|
| 117 |
+
for key_name in llm_key_names:
|
| 118 |
+
value = os.environ.get(key_name)
|
| 119 |
+
if value:
|
| 120 |
+
env_vars[key_name] = value
|
| 121 |
+
|
| 122 |
+
# Build SMOLTRACE command
|
| 123 |
+
cmd_parts = ["smoltrace-eval"]
|
| 124 |
+
cmd_parts.append(f"--model {model}")
|
| 125 |
+
cmd_parts.append(f"--provider {provider}")
|
| 126 |
+
if hf_inference_provider:
|
| 127 |
+
cmd_parts.append(f"--hf-inference-provider {hf_inference_provider}")
|
| 128 |
+
cmd_parts.append(f"--search-provider {search_provider}")
|
| 129 |
+
if enable_tools:
|
| 130 |
+
cmd_parts.append(f"--enable-tools {','.join(enable_tools)}")
|
| 131 |
+
cmd_parts.append(f"--agent-type {agent_type}")
|
| 132 |
+
cmd_parts.append(f"--dataset-name {dataset_name}")
|
| 133 |
+
cmd_parts.append(f"--split {split}")
|
| 134 |
+
if difficulty != "all":
|
| 135 |
+
cmd_parts.append(f"--difficulty {difficulty}")
|
| 136 |
+
if parallel_workers > 1:
|
| 137 |
+
cmd_parts.append(f"--parallel-workers {parallel_workers}")
|
| 138 |
+
cmd_parts.append(f"--output-format {output_format}")
|
| 139 |
+
if output_dir and output_format == "json":
|
| 140 |
+
cmd_parts.append(f"--output-dir {output_dir}")
|
| 141 |
+
if enable_otel:
|
| 142 |
+
cmd_parts.append("--enable-otel")
|
| 143 |
+
if not enable_gpu_metrics:
|
| 144 |
+
cmd_parts.append("--disable-gpu-metrics")
|
| 145 |
+
if private:
|
| 146 |
+
cmd_parts.append("--private")
|
| 147 |
+
if debug:
|
| 148 |
+
cmd_parts.append("--debug")
|
| 149 |
+
if quiet:
|
| 150 |
+
cmd_parts.append("--quiet")
|
| 151 |
+
cmd_parts.append(f"--run-id {job_id}")
|
| 152 |
+
|
| 153 |
+
command = " ".join(cmd_parts)
|
| 154 |
+
|
| 155 |
+
# Create Modal app dynamically
|
| 156 |
+
try:
|
| 157 |
+
app = modal.App(f"smoltrace-eval-{job_id}")
|
| 158 |
+
|
| 159 |
+
# Define Modal function
|
| 160 |
+
image = modal.Image.debian_slim().pip_install([
|
| 161 |
+
"smoltrace[otel,gpu]",
|
| 162 |
+
"litellm",
|
| 163 |
+
"transformers",
|
| 164 |
+
"torch"
|
| 165 |
+
])
|
| 166 |
+
|
| 167 |
+
@app.function(
|
| 168 |
+
image=image,
|
| 169 |
+
gpu=modal_gpu if modal_gpu else None,
|
| 170 |
+
secrets=[
|
| 171 |
+
modal.Secret.from_dict(env_vars)
|
| 172 |
+
],
|
| 173 |
+
timeout=3600 # 1 hour timeout
|
| 174 |
+
)
|
| 175 |
+
def run_evaluation():
|
| 176 |
+
"""Run SMOLTRACE evaluation on Modal"""
|
| 177 |
+
import subprocess
|
| 178 |
+
result = subprocess.run(command, shell=True, capture_output=True, text=True)
|
| 179 |
+
return {
|
| 180 |
+
"returncode": result.returncode,
|
| 181 |
+
"stdout": result.stdout,
|
| 182 |
+
"stderr": result.stderr
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
# Submit the job
|
| 186 |
+
# Note: Modal doesn't have a direct "submit and return" API like HF Jobs
|
| 187 |
+
# For now, we'll return the command that should be run
|
| 188 |
+
# In production, you'd use Modal's async API or spawn the function
|
| 189 |
+
|
| 190 |
+
return {
|
| 191 |
+
"success": True,
|
| 192 |
+
"job_id": job_id,
|
| 193 |
+
"platform": "Modal",
|
| 194 |
+
"hardware": modal_gpu or "CPU",
|
| 195 |
+
"command": command,
|
| 196 |
+
"status": "pending",
|
| 197 |
+
"message": "Modal job configured. Use Modal CLI to submit: modal run modal_job_submission.py",
|
| 198 |
+
"note": "Direct Modal API submission requires async handling. For now, use the generated command with Modal CLI."
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
except Exception as e:
|
| 202 |
+
return {
|
| 203 |
+
"success": False,
|
| 204 |
+
"error": f"Failed to create Modal job: {str(e)}",
|
| 205 |
+
"job_id": job_id
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def _auto_select_modal_hardware(provider: str, model: str) -> Optional[str]:
|
| 210 |
+
"""
|
| 211 |
+
Automatically select Modal hardware based on model and provider
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
provider: Provider type
|
| 215 |
+
model: Model identifier
|
| 216 |
+
|
| 217 |
+
Returns:
|
| 218 |
+
str: Modal GPU type or None for CPU
|
| 219 |
+
"""
|
| 220 |
+
# API models don't need GPU
|
| 221 |
+
if provider in ["litellm", "inference"]:
|
| 222 |
+
return None
|
| 223 |
+
|
| 224 |
+
# Local models need GPU - select based on model size
|
| 225 |
+
model_lower = model.lower()
|
| 226 |
+
|
| 227 |
+
if "70b" in model_lower or "65b" in model_lower:
|
| 228 |
+
return "A100-80GB" # Large models need A100 80GB
|
| 229 |
+
elif "13b" in model_lower or "34b" in model_lower:
|
| 230 |
+
return "A10G" # Medium models work well on A10G
|
| 231 |
+
elif "7b" in model_lower or "8b" in model_lower:
|
| 232 |
+
return "A10G" # Small models efficient on A10G
|
| 233 |
+
else:
|
| 234 |
+
return "A10G" # Default to A10G for unknown sizes
|