File size: 6,778 Bytes

baa41dd

import gradio as gr
import torch
from transformers import Idefics3ForConditionalGeneration, AutoProcessor
from PIL import Image

# Global variables
model = None
processor = None
device = None

def get_device():
    """Determine best device to use"""
    if torch.cuda.is_available():
        return 'cuda:0'
    else:
        return 'cpu'

def load_model():
    """Load SmolVLM model with proper device handling"""
    global model, processor, device
    
    try:
        print("Loading SmolVLM TRAC Automation Agent...")
        
        device = get_device()
        print(f"Using device: {device}")
        
        model_path = r"C:\Users\keith\OneDrive\Desktop\admin.trac.jobs-DATA\LLaMA-Factory_local\smolvlm_final_merged"
        
        # Load processor first
        processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
        print("✅ Processor loaded")
        
        # Load model with explicit device placement
        if device == 'cuda:0':
            # GPU loading
            model = Idefics3ForConditionalGeneration.from_pretrained(
                model_path,
                torch_dtype=torch.bfloat16,
                device_map={'': 0},  # Force all components to GPU 0
                trust_remote_code=True
            )
        else:
            # CPU loading  
            model = Idefics3ForConditionalGeneration.from_pretrained(
                model_path,
                torch_dtype=torch.float32,  # Use float32 for CPU
                device_map='cpu',
                trust_remote_code=True
            )
        
        print(f"✅ Model loaded on {device}")
        return f"✅ Model loaded successfully on {device}! Ready for TRAC automation."
        
    except Exception as e:
        error_msg = f"❌ Error loading model: {str(e)}"
        print(error_msg)
        return error_msg

def analyze_interface(image, task_type, custom_prompt):
    """Analyze TRAC interface with proper device handling"""
    global model, processor, device
    
    if model is None:
        return "❌ Please load the model first."
    
    if image is None:
        return "❌ Please upload a TRAC screenshot."
    
    try:
        # Convert image to RGB
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)
        image = image.convert("RGB")
        
        # Create task-specific prompts
        if task_type == "Longlisting":
            prompt = """<image>

Analyze this TRAC interface for LONGLISTING candidates. Identify clickable elements, candidate tables, selection controls, and filtering options. Provide automation steps."""
            
        elif task_type == "Shortlisting":
            prompt = """<image>

Analyze this TRAC interface for SHORTLISTING candidates. Identify evaluation controls, shortlist buttons, and approval workflows. Provide automation steps."""
            
        elif task_type == "Interview Setup":
            prompt = """<image>

Analyze this TRAC interface for INTERVIEW SETUP. Identify scheduling elements, calendar controls, and interviewer assignment. Provide automation steps."""
            
        else:  # Custom
            if not custom_prompt.strip():
                return "❌ Please enter a custom prompt for analysis."
            prompt = f"<image>\n{custom_prompt}"
        
        # Process inputs
        inputs = processor(text=prompt, images=[image], return_tensors="pt")
        
        # Move ALL tensors to the same device as model
        if device == 'cuda:0':
            inputs = {k: v.to(device) if torch.is_tensor(v) else v for k, v in inputs.items()}
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=250,
                do_sample=True,
                temperature=0.7,
                pad_token_id=processor.tokenizer.eos_token_id if hasattr(processor, 'tokenizer') else None
            )
        
        # Decode response
        response = processor.decode(outputs[0], skip_special_tokens=True)
        
        # Clean up response
        if prompt in response:
            response = response.replace(prompt, "").strip()
        
        response = response.replace("<image>", "").strip()
        
        if not response:
            response = "Model generated empty response. Try a different screenshot or prompt."
            
        return response
        
    except Exception as e:
        error_msg = f"❌ Analysis Error: {str(e)}"
        print(error_msg)
        return error_msg

def create_app():
    """Create Gradio interface"""
    with gr.Blocks(title="SmolVLM TRAC Automation") as demo:
        
        gr.Markdown("""

        # 🎯 SmolVLM TRAC Automation Agent

        

        **AI Assistant for HR Administrative Tasks**

        - 📋 Longlisting candidates

        - ⭐ Shortlisting applications  

        - 📅 Interview setup & scheduling

        """)
        
        with gr.Row():
            with gr.Column():
                # Model loading
                load_btn = gr.Button("🚀 Load Model", variant="primary")
                status = gr.Textbox(label="Status", value="Model not loaded")
                
                # Image upload
                image_input = gr.Image(label="TRAC Screenshot", type="pil")
                
                # Task selection
                task_type = gr.Radio(
                    choices=["Longlisting", "Shortlisting", "Interview Setup", "Custom"],
                    value="Longlisting",
                    label="Task Type"
                )
                
                # Custom prompt
                custom_prompt = gr.Textbox(
                    label="Custom Prompt",
                    placeholder="Describe what to analyze...",
                    lines=3
                )
                
                analyze_btn = gr.Button("🔍 Analyze", variant="primary")
            
            with gr.Column():
                result = gr.Textbox(
                    label="Automation Instructions",
                    lines=15,
                    show_copy_button=True
                )
        
        # Event handlers
        load_btn.click(load_model, outputs=status)
        analyze_btn.click(
            analyze_interface,
            inputs=[image_input, task_type, custom_prompt],
            outputs=result
        )
    
    return demo

if __name__ == "__main__":
    print("🌐 Starting SmolVLM TRAC Automation Interface...")
    app = create_app()
    app.launch(inbrowser=True)