Spaces:
Running
feat: Add comprehensive HF Jobs monitoring system
Browse filesImplemented full job monitoring capabilities for HuggingFace Jobs:
**New Features:**
- Job Monitoring screen with 3 tabs (Inspect Job, Recent Jobs, Guide)
- Real-time job status inspection with color-coded indicators
- Job logs viewing directly in the UI
- Recent jobs list with pagination
- HF token validation and helpful error messages
**Technical Implementation:**
- Added screens/job_monitoring.py - Complete job monitoring UI
- Enhanced utils/hf_jobs_submission.py with 3 new API functions:
- check_job_status() - Inspect job details via HF API
- get_job_logs() - Fetch job logs
- list_user_jobs() - List recent user jobs
- Updated app.py - Integrated job monitoring into navigation system
**API Integration:**
- Uses official HuggingFace Hub API (inspect_job, fetch_job_logs, list_jobs)
- Proper handling of JobInfo and JobStatus objects
- Supports both username/job_hash and job_hash formats
**UI Improvements:**
- Fixed job ID display (now shows actual HF Job ID: username/hash)
- Status emojis for all job states (QUEUED, RUNNING, SUCCEEDED, CANCELED, etc.)
- Clickable job URLs to HF dashboard
- Hardware flavor display (cpu-basic, a10g-small, etc.)
- Comprehensive troubleshooting guides
**Bug Fixes:**
- Fixed JobStatus enum handling (status.stage vs status.upper())
- Removed non-existent timing fields (started_at, finished_at, runtime)
- Added all status variants (CANCELED/CANCELLED, COMPLETED/SUCCEEDED, ERROR/FAILED)
- Proper token checking before API calls
- app.py +87 -24
- screens/job_monitoring.py +442 -0
- utils/hf_jobs_submission.py +180 -9
|
@@ -61,6 +61,7 @@ from screens.chat import (
|
|
| 61 |
)
|
| 62 |
from screens.documentation import create_documentation_screen
|
| 63 |
from screens.settings import create_settings_screen
|
|
|
|
| 64 |
from screens.mcp_helpers import (
|
| 65 |
call_analyze_leaderboard_sync,
|
| 66 |
call_debug_trace_sync,
|
|
@@ -1593,6 +1594,7 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 1593 |
new_eval_nav_btn = gr.Button("▶️ New Evaluation", variant="secondary", size="lg")
|
| 1594 |
compare_nav_btn = gr.Button("⚖️ Compare", variant="secondary", size="lg")
|
| 1595 |
chat_nav_btn = gr.Button("🤖 Agent Chat", variant="secondary", size="lg")
|
|
|
|
| 1596 |
synthetic_data_nav_btn = gr.Button("🔬 Synthetic Data", variant="secondary", size="lg")
|
| 1597 |
docs_nav_btn = gr.Button("📚 Documentation", variant="secondary", size="lg")
|
| 1598 |
settings_nav_btn = gr.Button("⚙️ Settings", variant="secondary", size="lg")
|
|
@@ -2451,6 +2453,11 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 2451 |
# ============================================================================
|
| 2452 |
settings_screen = create_settings_screen()
|
| 2453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2454 |
# ============================================================================
|
| 2455 |
# Evaluation Helper Functions
|
| 2456 |
# ============================================================================
|
|
@@ -2689,6 +2696,7 @@ No historical data available for **{model}**.
|
|
| 2689 |
|
| 2690 |
# Success - build success message
|
| 2691 |
job_id = result.get('job_id', 'unknown')
|
|
|
|
| 2692 |
job_platform = result.get('platform', infra_provider)
|
| 2693 |
job_hardware = result.get('hardware', hardware)
|
| 2694 |
job_status = result.get('status', 'submitted')
|
|
@@ -2760,8 +2768,11 @@ No historical data available for **{model}**.
|
|
| 2760 |
<h2 style="margin-top: 0;">✅ Evaluation Job Configured!</h2>
|
| 2761 |
|
| 2762 |
<div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
|
| 2763 |
-
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">
|
| 2764 |
-
<div style="font-family: monospace; font-size:
|
|
|
|
|
|
|
|
|
|
| 2765 |
</div>
|
| 2766 |
|
| 2767 |
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
|
|
@@ -2884,11 +2895,13 @@ No historical data available for **{model}**.
|
|
| 2884 |
new_evaluation_screen: gr.update(visible=False),
|
| 2885 |
documentation_screen: gr.update(visible=False),
|
| 2886 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 2887 |
dashboard_nav_btn: gr.update(variant="primary"),
|
| 2888 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2889 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2890 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2891 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2892 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2893 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2894 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -2909,11 +2922,13 @@ No historical data available for **{model}**.
|
|
| 2909 |
new_evaluation_screen: gr.update(visible=False),
|
| 2910 |
documentation_screen: gr.update(visible=False),
|
| 2911 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 2912 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2913 |
leaderboard_nav_btn: gr.update(variant="primary"),
|
| 2914 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2915 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2916 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2917 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2918 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2919 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -2932,11 +2947,13 @@ No historical data available for **{model}**.
|
|
| 2932 |
new_evaluation_screen: gr.update(visible=True),
|
| 2933 |
documentation_screen: gr.update(visible=False),
|
| 2934 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 2935 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2936 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2937 |
new_eval_nav_btn: gr.update(variant="primary"),
|
| 2938 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2939 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2940 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2941 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2942 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -2967,11 +2984,13 @@ No historical data available for **{model}**.
|
|
| 2967 |
new_evaluation_screen: gr.update(visible=False),
|
| 2968 |
documentation_screen: gr.update(visible=False),
|
| 2969 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 2970 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2971 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2972 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2973 |
compare_nav_btn: gr.update(variant="primary"),
|
| 2974 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2975 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2976 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2977 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -2991,11 +3010,13 @@ No historical data available for **{model}**.
|
|
| 2991 |
new_evaluation_screen: gr.update(visible=False),
|
| 2992 |
documentation_screen: gr.update(visible=False),
|
| 2993 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 2994 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2995 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2996 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2997 |
compare_nav_btn: gr.update(variant="primary"),
|
| 2998 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2999 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3000 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3001 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -3014,11 +3035,13 @@ No historical data available for **{model}**.
|
|
| 3014 |
new_evaluation_screen: gr.update(visible=False),
|
| 3015 |
documentation_screen: gr.update(visible=False),
|
| 3016 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 3017 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3018 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3019 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3020 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3021 |
chat_nav_btn: gr.update(variant="primary"),
|
|
|
|
| 3022 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3023 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3024 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -3037,11 +3060,13 @@ No historical data available for **{model}**.
|
|
| 3037 |
new_evaluation_screen: gr.update(visible=False),
|
| 3038 |
documentation_screen: gr.update(visible=False),
|
| 3039 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 3040 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3041 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3042 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3043 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3044 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3045 |
synthetic_data_nav_btn: gr.update(variant="primary"),
|
| 3046 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3047 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -3060,11 +3085,13 @@ No historical data available for **{model}**.
|
|
| 3060 |
new_evaluation_screen: gr.update(visible=False),
|
| 3061 |
documentation_screen: gr.update(visible=True),
|
| 3062 |
settings_screen: gr.update(visible=False),
|
|
|
|
| 3063 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3064 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3065 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3066 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3067 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3068 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3069 |
docs_nav_btn: gr.update(variant="primary"),
|
| 3070 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
@@ -3083,16 +3110,43 @@ No historical data available for **{model}**.
|
|
| 3083 |
new_evaluation_screen: gr.update(visible=False),
|
| 3084 |
documentation_screen: gr.update(visible=False),
|
| 3085 |
settings_screen: gr.update(visible=True),
|
|
|
|
| 3086 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3087 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3088 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3089 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3090 |
chat_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3091 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3092 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3093 |
settings_nav_btn: gr.update(variant="primary"),
|
| 3094 |
}
|
| 3095 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3096 |
# Synthetic Data Generator Callbacks
|
| 3097 |
def on_generate_synthetic_data(domain, tools, num_tasks, difficulty, agent_type):
|
| 3098 |
"""Generate synthetic dataset AND prompt template using MCP server"""
|
|
@@ -3381,8 +3435,8 @@ Result: {result}
|
|
| 3381 |
fn=navigate_to_dashboard,
|
| 3382 |
outputs=[
|
| 3383 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3384 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3385 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3386 |
] + list(dashboard_components.values())
|
| 3387 |
)
|
| 3388 |
|
|
@@ -3493,24 +3547,24 @@ Result: {result}
|
|
| 3493 |
fn=navigate_to_dashboard,
|
| 3494 |
outputs=[
|
| 3495 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3496 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3497 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3498 |
] + list(dashboard_components.values())
|
| 3499 |
)
|
| 3500 |
|
| 3501 |
leaderboard_nav_btn.click(
|
| 3502 |
fn=navigate_to_leaderboard,
|
| 3503 |
outputs=[
|
| 3504 |
-
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen,
|
| 3505 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3506 |
]
|
| 3507 |
)
|
| 3508 |
|
| 3509 |
new_eval_nav_btn.click(
|
| 3510 |
fn=navigate_to_new_evaluation,
|
| 3511 |
outputs=[
|
| 3512 |
-
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen,
|
| 3513 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3514 |
]
|
| 3515 |
)
|
| 3516 |
|
|
@@ -3518,8 +3572,8 @@ Result: {result}
|
|
| 3518 |
fn=navigate_to_compare,
|
| 3519 |
outputs=[
|
| 3520 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3521 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3522 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn,
|
| 3523 |
compare_components['compare_run_a_dropdown'], compare_components['compare_run_b_dropdown']
|
| 3524 |
]
|
| 3525 |
)
|
|
@@ -3528,16 +3582,25 @@ Result: {result}
|
|
| 3528 |
fn=navigate_to_chat,
|
| 3529 |
outputs=[
|
| 3530 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3531 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3532 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3533 |
]
|
| 3534 |
)
|
| 3535 |
synthetic_data_nav_btn.click(
|
| 3536 |
fn=navigate_to_synthetic_data,
|
| 3537 |
outputs=[
|
| 3538 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3539 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3540 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3541 |
]
|
| 3542 |
)
|
| 3543 |
|
|
@@ -3545,8 +3608,8 @@ Result: {result}
|
|
| 3545 |
fn=navigate_to_documentation,
|
| 3546 |
outputs=[
|
| 3547 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3548 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3549 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3550 |
]
|
| 3551 |
)
|
| 3552 |
|
|
@@ -3554,8 +3617,8 @@ Result: {result}
|
|
| 3554 |
fn=navigate_to_settings,
|
| 3555 |
outputs=[
|
| 3556 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3557 |
-
new_evaluation_screen, documentation_screen, settings_screen,
|
| 3558 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3559 |
]
|
| 3560 |
)
|
| 3561 |
|
|
@@ -3576,8 +3639,8 @@ Result: {result}
|
|
| 3576 |
back_to_leaderboard_from_eval_btn.click(
|
| 3577 |
fn=navigate_to_leaderboard,
|
| 3578 |
outputs=[
|
| 3579 |
-
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen,
|
| 3580 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3581 |
]
|
| 3582 |
)
|
| 3583 |
|
|
@@ -3691,8 +3754,8 @@ Result: {result}
|
|
| 3691 |
compare_components['back_to_leaderboard_btn'].click(
|
| 3692 |
fn=navigate_to_leaderboard,
|
| 3693 |
outputs=[
|
| 3694 |
-
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen,
|
| 3695 |
-
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3696 |
]
|
| 3697 |
)
|
| 3698 |
|
|
|
|
| 61 |
)
|
| 62 |
from screens.documentation import create_documentation_screen
|
| 63 |
from screens.settings import create_settings_screen
|
| 64 |
+
from screens.job_monitoring import create_job_monitoring_screen
|
| 65 |
from screens.mcp_helpers import (
|
| 66 |
call_analyze_leaderboard_sync,
|
| 67 |
call_debug_trace_sync,
|
|
|
|
| 1594 |
new_eval_nav_btn = gr.Button("▶️ New Evaluation", variant="secondary", size="lg")
|
| 1595 |
compare_nav_btn = gr.Button("⚖️ Compare", variant="secondary", size="lg")
|
| 1596 |
chat_nav_btn = gr.Button("🤖 Agent Chat", variant="secondary", size="lg")
|
| 1597 |
+
job_monitoring_nav_btn = gr.Button("🔍 Job Monitoring", variant="secondary", size="lg")
|
| 1598 |
synthetic_data_nav_btn = gr.Button("🔬 Synthetic Data", variant="secondary", size="lg")
|
| 1599 |
docs_nav_btn = gr.Button("📚 Documentation", variant="secondary", size="lg")
|
| 1600 |
settings_nav_btn = gr.Button("⚙️ Settings", variant="secondary", size="lg")
|
|
|
|
| 2453 |
# ============================================================================
|
| 2454 |
settings_screen = create_settings_screen()
|
| 2455 |
|
| 2456 |
+
# ============================================================================
|
| 2457 |
+
# Screen 11: Job Monitoring
|
| 2458 |
+
# ============================================================================
|
| 2459 |
+
job_monitoring_screen = create_job_monitoring_screen()
|
| 2460 |
+
|
| 2461 |
# ============================================================================
|
| 2462 |
# Evaluation Helper Functions
|
| 2463 |
# ============================================================================
|
|
|
|
| 2696 |
|
| 2697 |
# Success - build success message
|
| 2698 |
job_id = result.get('job_id', 'unknown')
|
| 2699 |
+
hf_job_id = result.get('hf_job_id', job_id) # Get actual HF job ID
|
| 2700 |
job_platform = result.get('platform', infra_provider)
|
| 2701 |
job_hardware = result.get('hardware', hardware)
|
| 2702 |
job_status = result.get('status', 'submitted')
|
|
|
|
| 2768 |
<h2 style="margin-top: 0;">✅ Evaluation Job Configured!</h2>
|
| 2769 |
|
| 2770 |
<div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
|
| 2771 |
+
<div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Run ID (SMOLTRACE)</div>
|
| 2772 |
+
<div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{job_id}</div>
|
| 2773 |
+
<div style="font-size: 0.9em; opacity: 0.9; margin-top: 10px; margin-bottom: 5px;">HF Job ID</div>
|
| 2774 |
+
<div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{hf_job_id}</div>
|
| 2775 |
+
<div style="font-size: 0.8em; opacity: 0.8; margin-top: 8px;">Use this ID to monitor: <code style="background: rgba(0,0,0,0.2); padding: 2px 6px; border-radius: 3px;">hf jobs inspect {hf_job_id}</code></div>
|
| 2776 |
</div>
|
| 2777 |
|
| 2778 |
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
|
|
|
|
| 2895 |
new_evaluation_screen: gr.update(visible=False),
|
| 2896 |
documentation_screen: gr.update(visible=False),
|
| 2897 |
settings_screen: gr.update(visible=False),
|
| 2898 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 2899 |
dashboard_nav_btn: gr.update(variant="primary"),
|
| 2900 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2901 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2902 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2903 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 2904 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 2905 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2906 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2907 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2922 |
new_evaluation_screen: gr.update(visible=False),
|
| 2923 |
documentation_screen: gr.update(visible=False),
|
| 2924 |
settings_screen: gr.update(visible=False),
|
| 2925 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 2926 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2927 |
leaderboard_nav_btn: gr.update(variant="primary"),
|
| 2928 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2929 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2930 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 2931 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 2932 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2933 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2934 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2947 |
new_evaluation_screen: gr.update(visible=True),
|
| 2948 |
documentation_screen: gr.update(visible=False),
|
| 2949 |
settings_screen: gr.update(visible=False),
|
| 2950 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 2951 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2952 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2953 |
new_eval_nav_btn: gr.update(variant="primary"),
|
| 2954 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 2955 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 2956 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 2957 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2958 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2959 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 2984 |
new_evaluation_screen: gr.update(visible=False),
|
| 2985 |
documentation_screen: gr.update(visible=False),
|
| 2986 |
settings_screen: gr.update(visible=False),
|
| 2987 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 2988 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 2989 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 2990 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 2991 |
compare_nav_btn: gr.update(variant="primary"),
|
| 2992 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 2993 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 2994 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 2995 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 2996 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3010 |
new_evaluation_screen: gr.update(visible=False),
|
| 3011 |
documentation_screen: gr.update(visible=False),
|
| 3012 |
settings_screen: gr.update(visible=False),
|
| 3013 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 3014 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3015 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3016 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3017 |
compare_nav_btn: gr.update(variant="primary"),
|
| 3018 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 3019 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 3020 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3021 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3022 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3035 |
new_evaluation_screen: gr.update(visible=False),
|
| 3036 |
documentation_screen: gr.update(visible=False),
|
| 3037 |
settings_screen: gr.update(visible=False),
|
| 3038 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 3039 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3040 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3041 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3042 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3043 |
chat_nav_btn: gr.update(variant="primary"),
|
| 3044 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 3045 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3046 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3047 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3060 |
new_evaluation_screen: gr.update(visible=False),
|
| 3061 |
documentation_screen: gr.update(visible=False),
|
| 3062 |
settings_screen: gr.update(visible=False),
|
| 3063 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 3064 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3065 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3066 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3067 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3068 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 3069 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 3070 |
synthetic_data_nav_btn: gr.update(variant="primary"),
|
| 3071 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3072 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3085 |
new_evaluation_screen: gr.update(visible=False),
|
| 3086 |
documentation_screen: gr.update(visible=True),
|
| 3087 |
settings_screen: gr.update(visible=False),
|
| 3088 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 3089 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3090 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3091 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3092 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3093 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 3094 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 3095 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3096 |
docs_nav_btn: gr.update(variant="primary"),
|
| 3097 |
settings_nav_btn: gr.update(variant="secondary"),
|
|
|
|
| 3110 |
new_evaluation_screen: gr.update(visible=False),
|
| 3111 |
documentation_screen: gr.update(visible=False),
|
| 3112 |
settings_screen: gr.update(visible=True),
|
| 3113 |
+
job_monitoring_screen: gr.update(visible=False),
|
| 3114 |
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3115 |
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3116 |
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3117 |
compare_nav_btn: gr.update(variant="secondary"),
|
| 3118 |
chat_nav_btn: gr.update(variant="secondary"),
|
| 3119 |
+
job_monitoring_nav_btn: gr.update(variant="secondary"),
|
| 3120 |
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3121 |
docs_nav_btn: gr.update(variant="secondary"),
|
| 3122 |
settings_nav_btn: gr.update(variant="primary"),
|
| 3123 |
}
|
| 3124 |
|
| 3125 |
+
def navigate_to_job_monitoring():
|
| 3126 |
+
"""Navigate to job monitoring screen"""
|
| 3127 |
+
return {
|
| 3128 |
+
dashboard_screen: gr.update(visible=False),
|
| 3129 |
+
leaderboard_screen: gr.update(visible=False),
|
| 3130 |
+
run_detail_screen: gr.update(visible=False),
|
| 3131 |
+
trace_detail_screen: gr.update(visible=False),
|
| 3132 |
+
compare_screen: gr.update(visible=False),
|
| 3133 |
+
chat_screen: gr.update(visible=False),
|
| 3134 |
+
synthetic_data_screen: gr.update(visible=False),
|
| 3135 |
+
new_evaluation_screen: gr.update(visible=False),
|
| 3136 |
+
documentation_screen: gr.update(visible=False),
|
| 3137 |
+
settings_screen: gr.update(visible=False),
|
| 3138 |
+
job_monitoring_screen: gr.update(visible=True),
|
| 3139 |
+
dashboard_nav_btn: gr.update(variant="secondary"),
|
| 3140 |
+
leaderboard_nav_btn: gr.update(variant="secondary"),
|
| 3141 |
+
new_eval_nav_btn: gr.update(variant="secondary"),
|
| 3142 |
+
compare_nav_btn: gr.update(variant="secondary"),
|
| 3143 |
+
chat_nav_btn: gr.update(variant="secondary"),
|
| 3144 |
+
job_monitoring_nav_btn: gr.update(variant="primary"),
|
| 3145 |
+
synthetic_data_nav_btn: gr.update(variant="secondary"),
|
| 3146 |
+
docs_nav_btn: gr.update(variant="secondary"),
|
| 3147 |
+
settings_nav_btn: gr.update(variant="secondary"),
|
| 3148 |
+
}
|
| 3149 |
+
|
| 3150 |
# Synthetic Data Generator Callbacks
|
| 3151 |
def on_generate_synthetic_data(domain, tools, num_tasks, difficulty, agent_type):
|
| 3152 |
"""Generate synthetic dataset AND prompt template using MCP server"""
|
|
|
|
| 3435 |
fn=navigate_to_dashboard,
|
| 3436 |
outputs=[
|
| 3437 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3438 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3439 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3440 |
] + list(dashboard_components.values())
|
| 3441 |
)
|
| 3442 |
|
|
|
|
| 3547 |
fn=navigate_to_dashboard,
|
| 3548 |
outputs=[
|
| 3549 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3550 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3551 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3552 |
] + list(dashboard_components.values())
|
| 3553 |
)
|
| 3554 |
|
| 3555 |
leaderboard_nav_btn.click(
|
| 3556 |
fn=navigate_to_leaderboard,
|
| 3557 |
outputs=[
|
| 3558 |
+
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3559 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3560 |
]
|
| 3561 |
)
|
| 3562 |
|
| 3563 |
new_eval_nav_btn.click(
|
| 3564 |
fn=navigate_to_new_evaluation,
|
| 3565 |
outputs=[
|
| 3566 |
+
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3567 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3568 |
]
|
| 3569 |
)
|
| 3570 |
|
|
|
|
| 3572 |
fn=navigate_to_compare,
|
| 3573 |
outputs=[
|
| 3574 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3575 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3576 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn,
|
| 3577 |
compare_components['compare_run_a_dropdown'], compare_components['compare_run_b_dropdown']
|
| 3578 |
]
|
| 3579 |
)
|
|
|
|
| 3582 |
fn=navigate_to_chat,
|
| 3583 |
outputs=[
|
| 3584 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3585 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3586 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3587 |
]
|
| 3588 |
)
|
| 3589 |
synthetic_data_nav_btn.click(
|
| 3590 |
fn=navigate_to_synthetic_data,
|
| 3591 |
outputs=[
|
| 3592 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3593 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3594 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3595 |
+
]
|
| 3596 |
+
)
|
| 3597 |
+
|
| 3598 |
+
job_monitoring_nav_btn.click(
|
| 3599 |
+
fn=navigate_to_job_monitoring,
|
| 3600 |
+
outputs=[
|
| 3601 |
+
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3602 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3603 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3604 |
]
|
| 3605 |
)
|
| 3606 |
|
|
|
|
| 3608 |
fn=navigate_to_documentation,
|
| 3609 |
outputs=[
|
| 3610 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3611 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3612 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3613 |
]
|
| 3614 |
)
|
| 3615 |
|
|
|
|
| 3617 |
fn=navigate_to_settings,
|
| 3618 |
outputs=[
|
| 3619 |
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen,
|
| 3620 |
+
new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3621 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3622 |
]
|
| 3623 |
)
|
| 3624 |
|
|
|
|
| 3639 |
back_to_leaderboard_from_eval_btn.click(
|
| 3640 |
fn=navigate_to_leaderboard,
|
| 3641 |
outputs=[
|
| 3642 |
+
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3643 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3644 |
]
|
| 3645 |
)
|
| 3646 |
|
|
|
|
| 3754 |
compare_components['back_to_leaderboard_btn'].click(
|
| 3755 |
fn=navigate_to_leaderboard,
|
| 3756 |
outputs=[
|
| 3757 |
+
dashboard_screen, leaderboard_screen, run_detail_screen, trace_detail_screen, compare_screen, chat_screen, synthetic_data_screen, new_evaluation_screen, documentation_screen, settings_screen, job_monitoring_screen,
|
| 3758 |
+
dashboard_nav_btn, leaderboard_nav_btn, new_eval_nav_btn, compare_nav_btn, chat_nav_btn, synthetic_data_nav_btn, job_monitoring_nav_btn, docs_nav_btn, settings_nav_btn
|
| 3759 |
]
|
| 3760 |
)
|
| 3761 |
|
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Job Monitoring Screen for TraceMind-AI
|
| 3 |
+
Allows users to monitor HuggingFace Jobs status and view logs
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import os
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def create_job_monitoring_screen():
|
| 12 |
+
"""
|
| 13 |
+
Create the job monitoring screen for HF Jobs
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
gr.Column: Gradio Column component for job monitoring
|
| 17 |
+
"""
|
| 18 |
+
with gr.Column(visible=False) as job_monitoring_interface:
|
| 19 |
+
gr.Markdown("""
|
| 20 |
+
# 🔍 Job Monitoring
|
| 21 |
+
|
| 22 |
+
Monitor your HuggingFace Jobs in real-time. Check job status, view logs, and track evaluation progress.
|
| 23 |
+
""")
|
| 24 |
+
|
| 25 |
+
with gr.Tabs():
|
| 26 |
+
# Tab 1: Single Job Inspection
|
| 27 |
+
with gr.Tab("📋 Inspect Job"):
|
| 28 |
+
gr.Markdown("""
|
| 29 |
+
### Inspect a Specific Job
|
| 30 |
+
|
| 31 |
+
Enter a HuggingFace Job ID to view its status and logs.
|
| 32 |
+
""")
|
| 33 |
+
|
| 34 |
+
with gr.Row():
|
| 35 |
+
job_id_input = gr.Textbox(
|
| 36 |
+
label="HF Job ID",
|
| 37 |
+
placeholder="e.g., kshitijthakkar/691eb073748f86bfa7144fcc",
|
| 38 |
+
info="Format: username/job_hash"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
with gr.Row():
|
| 42 |
+
inspect_btn = gr.Button("🔍 Inspect Job", variant="primary")
|
| 43 |
+
refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
|
| 44 |
+
|
| 45 |
+
# Job Status Section
|
| 46 |
+
with gr.Accordion("📊 Job Status", open=True):
|
| 47 |
+
job_status_display = gr.Markdown("Enter a Job ID and click 'Inspect Job' to view status")
|
| 48 |
+
|
| 49 |
+
# Job Logs Section
|
| 50 |
+
with gr.Accordion("📜 Job Logs", open=True):
|
| 51 |
+
with gr.Row():
|
| 52 |
+
show_logs_btn = gr.Button("📥 Load Logs", variant="secondary")
|
| 53 |
+
auto_refresh_logs = gr.Checkbox(
|
| 54 |
+
label="Auto-refresh logs (every 5s)",
|
| 55 |
+
value=False
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
job_logs_display = gr.Code(
|
| 59 |
+
label="Job Logs",
|
| 60 |
+
language="shell",
|
| 61 |
+
value="Click 'Load Logs' to view job output",
|
| 62 |
+
lines=20
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Tab 2: Recent Jobs List
|
| 66 |
+
with gr.Tab("📑 Recent Jobs"):
|
| 67 |
+
gr.Markdown("""
|
| 68 |
+
### Your Recent Jobs
|
| 69 |
+
|
| 70 |
+
View a list of your recent HuggingFace Jobs.
|
| 71 |
+
""")
|
| 72 |
+
|
| 73 |
+
with gr.Row():
|
| 74 |
+
list_jobs_btn = gr.Button("📋 Load Recent Jobs", variant="primary")
|
| 75 |
+
jobs_limit = gr.Slider(
|
| 76 |
+
minimum=5,
|
| 77 |
+
maximum=50,
|
| 78 |
+
value=10,
|
| 79 |
+
step=5,
|
| 80 |
+
label="Number of jobs to fetch"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
recent_jobs_display = gr.Markdown("Click 'Load Recent Jobs' to view your jobs")
|
| 84 |
+
|
| 85 |
+
# Tab 3: Job Monitoring Guide
|
| 86 |
+
with gr.Tab("📖 Guide"):
|
| 87 |
+
gr.Markdown("""
|
| 88 |
+
### Using Job Monitoring
|
| 89 |
+
|
| 90 |
+
#### How to Get Your Job ID
|
| 91 |
+
|
| 92 |
+
After submitting an evaluation from the "New Evaluation" tab, you'll receive:
|
| 93 |
+
- **Run ID (SMOLTRACE)**: Used for tracking results in datasets (e.g., `job_3a22ceca`)
|
| 94 |
+
- **HF Job ID**: Used for monitoring the actual job (e.g., `kshitijthakkar/691eb073748f86bfa7144fcc`)
|
| 95 |
+
|
| 96 |
+
Use the **HF Job ID** here to monitor your job.
|
| 97 |
+
|
| 98 |
+
#### Job Status Values
|
| 99 |
+
|
| 100 |
+
- **QUEUED**: Job is waiting to start
|
| 101 |
+
- **STARTING**: Job is being initialized
|
| 102 |
+
- **RUNNING**: Job is currently executing
|
| 103 |
+
- **SUCCEEDED**: Job completed successfully
|
| 104 |
+
- **FAILED**: Job encountered an error
|
| 105 |
+
- **CANCELLED**: Job was manually cancelled
|
| 106 |
+
- **STOPPED**: Job was stopped by the system
|
| 107 |
+
|
| 108 |
+
#### CLI Commands Reference
|
| 109 |
+
|
| 110 |
+
You can also use the HuggingFace CLI to monitor jobs:
|
| 111 |
+
|
| 112 |
+
```bash
|
| 113 |
+
# List your running jobs
|
| 114 |
+
hf jobs ps
|
| 115 |
+
|
| 116 |
+
# Inspect a specific job
|
| 117 |
+
hf jobs inspect <job_id>
|
| 118 |
+
|
| 119 |
+
# View logs from a job
|
| 120 |
+
hf jobs logs <job_id>
|
| 121 |
+
|
| 122 |
+
# Follow logs in real-time
|
| 123 |
+
hf jobs logs <job_id> --follow
|
| 124 |
+
|
| 125 |
+
# Cancel a job
|
| 126 |
+
hf jobs cancel <job_id>
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
#### Tips
|
| 130 |
+
|
| 131 |
+
- 💡 **Bookmark your Job ID** after submission for easy access
|
| 132 |
+
- 🔄 **Use auto-refresh** for logs when job is running
|
| 133 |
+
- 📊 **Check status regularly** to catch any issues early
|
| 134 |
+
- 📝 **Review logs** if your job fails to understand what went wrong
|
| 135 |
+
- 🎯 **Results appear in leaderboard** once job succeeds and uploads datasets
|
| 136 |
+
""")
|
| 137 |
+
|
| 138 |
+
# Functions for job monitoring
|
| 139 |
+
def inspect_job(job_id: str):
|
| 140 |
+
"""Inspect a specific job's status"""
|
| 141 |
+
import os
|
| 142 |
+
|
| 143 |
+
if not job_id or not job_id.strip():
|
| 144 |
+
return gr.update(value="❌ Please enter a Job ID")
|
| 145 |
+
|
| 146 |
+
# Check if token is configured before making API call
|
| 147 |
+
token = os.environ.get("HF_TOKEN")
|
| 148 |
+
if not token or not token.strip():
|
| 149 |
+
return gr.update(
|
| 150 |
+
value="""
|
| 151 |
+
### ⚠️ HuggingFace Token Not Configured
|
| 152 |
+
|
| 153 |
+
**Action Required**:
|
| 154 |
+
1. Go to "⚙️ Settings" in the sidebar
|
| 155 |
+
2. Enter your HuggingFace token (must have "Run Jobs" permission)
|
| 156 |
+
3. Click "💾 Save API Keys"
|
| 157 |
+
4. Return to this tab and try again
|
| 158 |
+
"""
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
from utils.hf_jobs_submission import check_job_status
|
| 162 |
+
|
| 163 |
+
result = check_job_status(job_id.strip())
|
| 164 |
+
|
| 165 |
+
if not result.get("success"):
|
| 166 |
+
error_msg = result.get('error', 'Unknown error')
|
| 167 |
+
|
| 168 |
+
return gr.update(
|
| 169 |
+
value=f"""
|
| 170 |
+
### ❌ Failed to Fetch Job Status
|
| 171 |
+
|
| 172 |
+
**Error**: {error_msg}
|
| 173 |
+
|
| 174 |
+
**Job ID**: `{job_id}`
|
| 175 |
+
|
| 176 |
+
**Troubleshooting**:
|
| 177 |
+
- Verify the Job ID format is correct (format: `username/job_hash`)
|
| 178 |
+
- Check that the job exists in your account
|
| 179 |
+
- Ensure your HF token has the correct permissions
|
| 180 |
+
- Token must have **Run Jobs** permission enabled
|
| 181 |
+
"""
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
# Format status with emoji
|
| 185 |
+
status = result.get("status", "unknown")
|
| 186 |
+
# Convert status to string if it's an enum
|
| 187 |
+
status_str = str(status).upper() if status else "UNKNOWN"
|
| 188 |
+
|
| 189 |
+
status_emoji = {
|
| 190 |
+
"QUEUED": "⏳",
|
| 191 |
+
"STARTING": "🔄",
|
| 192 |
+
"RUNNING": "▶️",
|
| 193 |
+
"SUCCEEDED": "✅",
|
| 194 |
+
"COMPLETED": "✅", # Alternative success status
|
| 195 |
+
"FAILED": "❌",
|
| 196 |
+
"ERROR": "❌", # Alternative failure status
|
| 197 |
+
"CANCELLED": "🚫",
|
| 198 |
+
"CANCELED": "🚫", # US spelling variant
|
| 199 |
+
"STOPPED": "⏹️",
|
| 200 |
+
"TIMEOUT": "⏱️"
|
| 201 |
+
}.get(status_str, "❓")
|
| 202 |
+
|
| 203 |
+
status_color = {
|
| 204 |
+
"QUEUED": "#FFA500",
|
| 205 |
+
"STARTING": "#1E90FF",
|
| 206 |
+
"RUNNING": "#00CED1",
|
| 207 |
+
"SUCCEEDED": "#32CD32",
|
| 208 |
+
"COMPLETED": "#32CD32", # Alternative success status
|
| 209 |
+
"FAILED": "#DC143C",
|
| 210 |
+
"ERROR": "#DC143C", # Alternative failure status
|
| 211 |
+
"CANCELLED": "#696969",
|
| 212 |
+
"CANCELED": "#696969", # US spelling variant
|
| 213 |
+
"STOPPED": "#A9A9A9",
|
| 214 |
+
"TIMEOUT": "#FF8C00"
|
| 215 |
+
}.get(status_str, "#888888")
|
| 216 |
+
|
| 217 |
+
created_at = result.get("created_at", "N/A")
|
| 218 |
+
flavor = result.get("flavor", "N/A")
|
| 219 |
+
job_url = result.get("url", None)
|
| 220 |
+
|
| 221 |
+
# Format job URL as clickable link
|
| 222 |
+
job_url_display = f"[Open in HuggingFace]({job_url})" if job_url else "N/A"
|
| 223 |
+
|
| 224 |
+
return gr.update(
|
| 225 |
+
value=f"""
|
| 226 |
+
### {status_emoji} Job Status: <span style="color: {status_color};">{status_str}</span>
|
| 227 |
+
|
| 228 |
+
**Job ID**: `{job_id}`
|
| 229 |
+
|
| 230 |
+
#### Details
|
| 231 |
+
|
| 232 |
+
- **Created**: {created_at}
|
| 233 |
+
- **Hardware**: {flavor}
|
| 234 |
+
- **Job URL**: {job_url_display}
|
| 235 |
+
|
| 236 |
+
#### Next Steps
|
| 237 |
+
|
| 238 |
+
{_get_next_steps(status_str)}
|
| 239 |
+
|
| 240 |
+
---
|
| 241 |
+
|
| 242 |
+
💡 **Tip**: Use "📥 Load Logs" button below to view detailed execution logs and check progress.
|
| 243 |
+
"""
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
def _get_next_steps(status: str) -> str:
|
| 247 |
+
"""Get next steps based on job status"""
|
| 248 |
+
status_upper = str(status).upper() if status else "UNKNOWN"
|
| 249 |
+
|
| 250 |
+
if status_upper == "QUEUED":
|
| 251 |
+
return "⏳ Your job is waiting in the queue. It will start soon."
|
| 252 |
+
elif status_upper == "STARTING":
|
| 253 |
+
return "🔄 Your job is being initialized. This usually takes 1-2 minutes."
|
| 254 |
+
elif status_upper == "RUNNING":
|
| 255 |
+
return "▶️ Your job is running! Click 'Load Logs' below to view progress."
|
| 256 |
+
elif status_upper in ["SUCCEEDED", "COMPLETED"]:
|
| 257 |
+
return "✅ Your job completed successfully! Check the Leaderboard tab for results."
|
| 258 |
+
elif status_upper in ["FAILED", "ERROR"]:
|
| 259 |
+
return "❌ Your job failed. Click 'Load Logs' below to see what went wrong."
|
| 260 |
+
elif status_upper in ["CANCELLED", "CANCELED", "STOPPED"]:
|
| 261 |
+
return "🚫 Your job was stopped. You can submit a new job from the 'New Evaluation' tab."
|
| 262 |
+
elif status_upper == "TIMEOUT":
|
| 263 |
+
return "⏱️ Your job exceeded the time limit. Consider optimizing your model or increasing the timeout."
|
| 264 |
+
else:
|
| 265 |
+
return "❓ Unknown status. Try refreshing or check the HF Jobs dashboard."
|
| 266 |
+
|
| 267 |
+
def load_job_logs(job_id: str):
|
| 268 |
+
"""Load logs for a specific job"""
|
| 269 |
+
import os
|
| 270 |
+
|
| 271 |
+
if not job_id or not job_id.strip():
|
| 272 |
+
return gr.update(value="❌ Please enter a Job ID first")
|
| 273 |
+
|
| 274 |
+
# Check if token is configured before making API call
|
| 275 |
+
token = os.environ.get("HF_TOKEN")
|
| 276 |
+
if not token or not token.strip():
|
| 277 |
+
return gr.update(
|
| 278 |
+
value="⚠️ HuggingFace Token Not Configured\n\nPlease configure your HF token in Settings first."
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
from utils.hf_jobs_submission import get_job_logs
|
| 282 |
+
|
| 283 |
+
result = get_job_logs(job_id.strip())
|
| 284 |
+
|
| 285 |
+
if not result.get("success"):
|
| 286 |
+
return gr.update(
|
| 287 |
+
value=f"❌ Failed to fetch logs: {result.get('error', 'Unknown error')}\n\nEnsure your HF token has 'Run Jobs' permission."
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
logs = result.get("logs", "")
|
| 291 |
+
if not logs or not logs.strip():
|
| 292 |
+
return gr.update(value="ℹ️ No logs available yet. Job may not have started.\n\nTry refreshing after a minute.")
|
| 293 |
+
|
| 294 |
+
return gr.update(value=logs)
|
| 295 |
+
|
| 296 |
+
def list_recent_jobs(limit: int):
|
| 297 |
+
"""List user's recent jobs"""
|
| 298 |
+
import os
|
| 299 |
+
from utils.hf_jobs_submission import list_user_jobs
|
| 300 |
+
|
| 301 |
+
# Check if token is configured before making API call
|
| 302 |
+
token = os.environ.get("HF_TOKEN")
|
| 303 |
+
if not token or not token.strip():
|
| 304 |
+
return gr.update(
|
| 305 |
+
value="""
|
| 306 |
+
### ⚠️ HuggingFace Token Not Configured
|
| 307 |
+
|
| 308 |
+
**Action Required**:
|
| 309 |
+
1. Go to "⚙️ Settings" in the sidebar
|
| 310 |
+
2. Enter your HuggingFace token (must have "Run Jobs" permission)
|
| 311 |
+
3. Click "💾 Save API Keys"
|
| 312 |
+
4. Return to this tab and try again
|
| 313 |
+
|
| 314 |
+
**Note**: Your HF token must:
|
| 315 |
+
- Start with `hf_`
|
| 316 |
+
- Have **Read**, **Write**, AND **Run Jobs** permissions
|
| 317 |
+
- Be from a HuggingFace Pro account ($9/month)
|
| 318 |
+
|
| 319 |
+
Get your token at: https://huggingface.co/settings/tokens
|
| 320 |
+
"""
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
result = list_user_jobs(limit=int(limit))
|
| 324 |
+
|
| 325 |
+
if not result.get("success"):
|
| 326 |
+
error_msg = result.get('error', 'Unknown error')
|
| 327 |
+
|
| 328 |
+
# Check for common error patterns
|
| 329 |
+
if "invalid" in error_msg.lower() or "token" in error_msg.lower():
|
| 330 |
+
troubleshooting = """
|
| 331 |
+
**Troubleshooting**:
|
| 332 |
+
- ⚠️ **Token may be invalid** - Regenerate your token at HuggingFace settings
|
| 333 |
+
- ✅ Ensure token has **Run Jobs** permission (not just Read/Write)
|
| 334 |
+
- ✅ Verify you have an active **HuggingFace Pro account**
|
| 335 |
+
- ✅ Token should start with `hf_`
|
| 336 |
+
"""
|
| 337 |
+
else:
|
| 338 |
+
troubleshooting = """
|
| 339 |
+
**Troubleshooting**:
|
| 340 |
+
- Refresh this page and try again
|
| 341 |
+
- Check your internet connection
|
| 342 |
+
- Verify HuggingFace services are operational
|
| 343 |
+
"""
|
| 344 |
+
|
| 345 |
+
return gr.update(
|
| 346 |
+
value=f"""
|
| 347 |
+
### ❌ Failed to Fetch Jobs
|
| 348 |
+
|
| 349 |
+
**Error**: {error_msg}
|
| 350 |
+
|
| 351 |
+
{troubleshooting}
|
| 352 |
+
"""
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
jobs = result.get("jobs", [])
|
| 356 |
+
if not jobs:
|
| 357 |
+
return gr.update(
|
| 358 |
+
value="""
|
| 359 |
+
### ℹ️ No Jobs Found
|
| 360 |
+
|
| 361 |
+
You haven't submitted any jobs yet.
|
| 362 |
+
|
| 363 |
+
**Get Started**:
|
| 364 |
+
1. Go to the "New Evaluation" tab
|
| 365 |
+
2. Configure your model and settings
|
| 366 |
+
3. Submit an evaluation job
|
| 367 |
+
4. Come back here to monitor progress!
|
| 368 |
+
"""
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
# Build jobs table
|
| 372 |
+
jobs_table = "### 📋 Your Recent Jobs\n\n"
|
| 373 |
+
jobs_table += "| Job ID | Status | Created At |\n"
|
| 374 |
+
jobs_table += "|--------|--------|------------|\n"
|
| 375 |
+
|
| 376 |
+
for job in jobs:
|
| 377 |
+
job_id = job.get("job_id", "N/A")
|
| 378 |
+
status = job.get("status", "unknown")
|
| 379 |
+
created = job.get("created_at", "N/A")
|
| 380 |
+
|
| 381 |
+
# Convert status to string if it's an enum
|
| 382 |
+
status_str = str(status).upper() if status else "UNKNOWN"
|
| 383 |
+
|
| 384 |
+
status_emoji = {
|
| 385 |
+
"QUEUED": "⏳",
|
| 386 |
+
"STARTING": "🔄",
|
| 387 |
+
"RUNNING": "▶️",
|
| 388 |
+
"SUCCEEDED": "✅",
|
| 389 |
+
"COMPLETED": "✅", # Alternative success status
|
| 390 |
+
"FAILED": "❌",
|
| 391 |
+
"ERROR": "❌", # Alternative failure status
|
| 392 |
+
"CANCELLED": "🚫",
|
| 393 |
+
"CANCELED": "🚫", # US spelling variant
|
| 394 |
+
"STOPPED": "⏹️",
|
| 395 |
+
"TIMEOUT": "⏱️"
|
| 396 |
+
}.get(status_str, "❓")
|
| 397 |
+
|
| 398 |
+
jobs_table += f"| `{job_id}` | {status_emoji} {status} | {created} |\n"
|
| 399 |
+
|
| 400 |
+
jobs_table += f"\n**Total Jobs**: {len(jobs)}\n\n"
|
| 401 |
+
jobs_table += "💡 **Tip**: Copy a Job ID and paste it in the 'Inspect Job' tab to view details and logs."
|
| 402 |
+
|
| 403 |
+
return gr.update(value=jobs_table)
|
| 404 |
+
|
| 405 |
+
# Wire up button events
|
| 406 |
+
inspect_btn.click(
|
| 407 |
+
fn=inspect_job,
|
| 408 |
+
inputs=[job_id_input],
|
| 409 |
+
outputs=[job_status_display]
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
refresh_btn.click(
|
| 413 |
+
fn=inspect_job,
|
| 414 |
+
inputs=[job_id_input],
|
| 415 |
+
outputs=[job_status_display]
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
show_logs_btn.click(
|
| 419 |
+
fn=load_job_logs,
|
| 420 |
+
inputs=[job_id_input],
|
| 421 |
+
outputs=[job_logs_display]
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
list_jobs_btn.click(
|
| 425 |
+
fn=list_recent_jobs,
|
| 426 |
+
inputs=[jobs_limit],
|
| 427 |
+
outputs=[recent_jobs_display]
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
# Auto-refresh functionality (handled by Gradio's auto-update)
|
| 431 |
+
# Note: For production, consider using gr.Timer or similar for automatic refreshes
|
| 432 |
+
|
| 433 |
+
return job_monitoring_interface
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
if __name__ == "__main__":
|
| 437 |
+
# For standalone testing
|
| 438 |
+
with gr.Blocks() as demo:
|
| 439 |
+
job_monitoring = create_job_monitoring_screen()
|
| 440 |
+
# Make it visible for standalone testing
|
| 441 |
+
job_monitoring.visible = True
|
| 442 |
+
demo.launch()
|
|
@@ -245,20 +245,191 @@ def _auto_select_hf_hardware(provider: str, model: str) -> str:
|
|
| 245 |
return "t4-small"
|
| 246 |
|
| 247 |
|
| 248 |
-
def check_job_status(
|
| 249 |
"""
|
| 250 |
-
Check the status of a HuggingFace Job
|
| 251 |
|
| 252 |
Args:
|
| 253 |
-
|
| 254 |
hf_token: HuggingFace token (optional, uses env if not provided)
|
| 255 |
|
| 256 |
Returns:
|
| 257 |
dict: Job status information
|
| 258 |
"""
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
return "t4-small"
|
| 246 |
|
| 247 |
|
| 248 |
+
def check_job_status(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
|
| 249 |
"""
|
| 250 |
+
Check the status of a HuggingFace Job using the Jobs API
|
| 251 |
|
| 252 |
Args:
|
| 253 |
+
hf_job_id: HF Job ID (format: username/job_hash or just job_hash)
|
| 254 |
hf_token: HuggingFace token (optional, uses env if not provided)
|
| 255 |
|
| 256 |
Returns:
|
| 257 |
dict: Job status information
|
| 258 |
"""
|
| 259 |
+
try:
|
| 260 |
+
from huggingface_hub import HfApi
|
| 261 |
+
except ImportError:
|
| 262 |
+
return {
|
| 263 |
+
"success": False,
|
| 264 |
+
"error": "huggingface_hub package not installed",
|
| 265 |
+
"job_id": hf_job_id
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
token = hf_token or os.environ.get("HF_TOKEN")
|
| 269 |
+
if not token:
|
| 270 |
+
return {
|
| 271 |
+
"success": False,
|
| 272 |
+
"error": "HuggingFace token not configured",
|
| 273 |
+
"job_id": hf_job_id
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
try:
|
| 277 |
+
api = HfApi(token=token)
|
| 278 |
+
|
| 279 |
+
# Parse job_id and namespace (username)
|
| 280 |
+
# Format can be "username/job_hash" or just "job_hash"
|
| 281 |
+
if "/" in hf_job_id:
|
| 282 |
+
namespace, job_id_only = hf_job_id.split("/", 1)
|
| 283 |
+
job_info = api.inspect_job(job_id=job_id_only, namespace=namespace)
|
| 284 |
+
else:
|
| 285 |
+
job_info = api.inspect_job(job_id=hf_job_id)
|
| 286 |
+
|
| 287 |
+
# Extract status stage from JobStatus object
|
| 288 |
+
if hasattr(job_info, 'status') and hasattr(job_info.status, 'stage'):
|
| 289 |
+
status = job_info.status.stage
|
| 290 |
+
else:
|
| 291 |
+
status = str(job_info.status) if hasattr(job_info, 'status') else "unknown"
|
| 292 |
+
|
| 293 |
+
return {
|
| 294 |
+
"success": True,
|
| 295 |
+
"job_id": hf_job_id,
|
| 296 |
+
"status": status,
|
| 297 |
+
"created_at": str(job_info.created_at) if hasattr(job_info, 'created_at') else None,
|
| 298 |
+
"flavor": job_info.flavor if hasattr(job_info, 'flavor') else None,
|
| 299 |
+
"url": job_info.url if hasattr(job_info, 'url') else None,
|
| 300 |
+
"info": str(job_info)
|
| 301 |
+
}
|
| 302 |
+
except Exception as e:
|
| 303 |
+
return {
|
| 304 |
+
"success": False,
|
| 305 |
+
"error": f"Failed to fetch job status: {str(e)}",
|
| 306 |
+
"job_id": hf_job_id
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def get_job_logs(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
|
| 311 |
+
"""
|
| 312 |
+
Retrieve logs from a HuggingFace Job
|
| 313 |
+
|
| 314 |
+
Args:
|
| 315 |
+
hf_job_id: HF Job ID (format: username/job_hash or just job_hash)
|
| 316 |
+
hf_token: HuggingFace token (optional, uses env if not provided)
|
| 317 |
+
|
| 318 |
+
Returns:
|
| 319 |
+
dict: Job logs information
|
| 320 |
+
"""
|
| 321 |
+
try:
|
| 322 |
+
from huggingface_hub import HfApi
|
| 323 |
+
except ImportError:
|
| 324 |
+
return {
|
| 325 |
+
"success": False,
|
| 326 |
+
"error": "huggingface_hub package not installed",
|
| 327 |
+
"job_id": hf_job_id
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
token = hf_token or os.environ.get("HF_TOKEN")
|
| 331 |
+
if not token:
|
| 332 |
+
return {
|
| 333 |
+
"success": False,
|
| 334 |
+
"error": "HuggingFace token not configured",
|
| 335 |
+
"job_id": hf_job_id
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
try:
|
| 339 |
+
api = HfApi(token=token)
|
| 340 |
+
|
| 341 |
+
# Parse job_id and namespace (username)
|
| 342 |
+
# Format can be "username/job_hash" or just "job_hash"
|
| 343 |
+
if "/" in hf_job_id:
|
| 344 |
+
namespace, job_id_only = hf_job_id.split("/", 1)
|
| 345 |
+
logs_iterable = api.fetch_job_logs(job_id=job_id_only, namespace=namespace)
|
| 346 |
+
else:
|
| 347 |
+
logs_iterable = api.fetch_job_logs(job_id=hf_job_id)
|
| 348 |
+
|
| 349 |
+
# Convert iterable to string
|
| 350 |
+
logs = "\n".join(logs_iterable)
|
| 351 |
+
|
| 352 |
+
return {
|
| 353 |
+
"success": True,
|
| 354 |
+
"job_id": hf_job_id,
|
| 355 |
+
"logs": logs
|
| 356 |
+
}
|
| 357 |
+
except Exception as e:
|
| 358 |
+
return {
|
| 359 |
+
"success": False,
|
| 360 |
+
"error": f"Failed to fetch job logs: {str(e)}",
|
| 361 |
+
"job_id": hf_job_id,
|
| 362 |
+
"logs": ""
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def list_user_jobs(hf_token: Optional[str] = None, limit: int = 10) -> Dict:
|
| 367 |
+
"""
|
| 368 |
+
List recent jobs for the authenticated user
|
| 369 |
+
|
| 370 |
+
Args:
|
| 371 |
+
hf_token: HuggingFace token (optional, uses env if not provided)
|
| 372 |
+
limit: Maximum number of jobs to return (applied after fetching)
|
| 373 |
+
|
| 374 |
+
Returns:
|
| 375 |
+
dict: List of user's jobs
|
| 376 |
+
"""
|
| 377 |
+
try:
|
| 378 |
+
from huggingface_hub import HfApi
|
| 379 |
+
except ImportError:
|
| 380 |
+
return {
|
| 381 |
+
"success": False,
|
| 382 |
+
"error": "huggingface_hub package not installed"
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
token = hf_token or os.environ.get("HF_TOKEN")
|
| 386 |
+
if not token:
|
| 387 |
+
return {
|
| 388 |
+
"success": False,
|
| 389 |
+
"error": "HuggingFace token not configured"
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
try:
|
| 393 |
+
api = HfApi(token=token)
|
| 394 |
+
# List user's jobs (no limit parameter in API, so we fetch all and slice)
|
| 395 |
+
all_jobs = api.list_jobs()
|
| 396 |
+
|
| 397 |
+
# Limit the results
|
| 398 |
+
jobs_to_display = all_jobs[:limit] if limit > 0 else all_jobs
|
| 399 |
+
|
| 400 |
+
job_list = []
|
| 401 |
+
for job in jobs_to_display:
|
| 402 |
+
# Extract owner name from JobOwner object
|
| 403 |
+
owner_name = job.owner.name if hasattr(job, 'owner') and hasattr(job.owner, 'name') else None
|
| 404 |
+
|
| 405 |
+
# Build job_id in the format: owner/id
|
| 406 |
+
if owner_name and hasattr(job, 'id'):
|
| 407 |
+
job_id = f"{owner_name}/{job.id}"
|
| 408 |
+
elif hasattr(job, 'id'):
|
| 409 |
+
job_id = job.id
|
| 410 |
+
else:
|
| 411 |
+
job_id = "unknown"
|
| 412 |
+
|
| 413 |
+
# Extract status stage from JobStatus object
|
| 414 |
+
if hasattr(job, 'status') and hasattr(job.status, 'stage'):
|
| 415 |
+
status = job.status.stage
|
| 416 |
+
else:
|
| 417 |
+
status = str(job.status) if hasattr(job, 'status') else "unknown"
|
| 418 |
+
|
| 419 |
+
job_list.append({
|
| 420 |
+
"job_id": job_id,
|
| 421 |
+
"status": status,
|
| 422 |
+
"created_at": str(job.created_at) if hasattr(job, 'created_at') else None
|
| 423 |
+
})
|
| 424 |
+
|
| 425 |
+
return {
|
| 426 |
+
"success": True,
|
| 427 |
+
"jobs": job_list,
|
| 428 |
+
"count": len(job_list)
|
| 429 |
+
}
|
| 430 |
+
except Exception as e:
|
| 431 |
+
return {
|
| 432 |
+
"success": False,
|
| 433 |
+
"error": f"Failed to list jobs: {str(e)}",
|
| 434 |
+
"jobs": []
|
| 435 |
+
}
|