|
"""
|
|
SmolVLM UI Automation Agent - Test Script
|
|
Your trained model is ready!
|
|
"""
|
|
|
|
import torch
|
|
from transformers import Idefics3ForConditionalGeneration, AutoProcessor
|
|
from PIL import Image
|
|
import os
|
|
|
|
def load_model():
|
|
"""Load your trained SmolVLM model"""
|
|
model_path = r"C:\Users\keith\OneDrive\Desktop\admin.trac.jobs-DATA\LLaMA-Factory_local\smolvlm_final_merged"
|
|
|
|
print("Loading your trained SmolVLM UI automation agent...")
|
|
model = Idefics3ForConditionalGeneration.from_pretrained(
|
|
model_path,
|
|
torch_dtype=torch.bfloat16,
|
|
device_map="auto",
|
|
trust_remote_code=True
|
|
)
|
|
|
|
processor = AutoProcessor.from_pretrained(model_path)
|
|
print("Model loaded successfully!")
|
|
return model, processor
|
|
|
|
def analyze_screenshot(image_path: str, model, processor):
|
|
"""Analyze a screenshot for UI automation"""
|
|
|
|
|
|
image = Image.open(image_path).convert("RGB")
|
|
prompt = "<image>\nAnalyze this interface for UI automation opportunities. Identify clickable elements and automation targets."
|
|
|
|
|
|
inputs = processor(text=prompt, images=[image], return_tensors="pt")
|
|
|
|
|
|
with torch.no_grad():
|
|
outputs = model.generate(
|
|
**inputs,
|
|
max_new_tokens=150,
|
|
do_sample=True,
|
|
temperature=0.7,
|
|
top_p=0.9
|
|
)
|
|
|
|
|
|
response = processor.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
if "Assistant:" in response:
|
|
response = response.split("Assistant:")[-1].strip()
|
|
|
|
return response
|
|
|
|
def main():
|
|
print("π€ SmolVLM UI Automation Agent")
|
|
print("=" * 50)
|
|
print("Your custom-trained model for TRAC administration!")
|
|
print()
|
|
|
|
try:
|
|
|
|
model, processor = load_model()
|
|
|
|
while True:
|
|
print("\nOptions:")
|
|
print("1. Analyze a screenshot")
|
|
print("2. Quit")
|
|
|
|
choice = input("\nEnter choice (1-2): ").strip()
|
|
|
|
if choice == "1":
|
|
image_path = input("Enter path to screenshot: ").strip().strip('"')
|
|
|
|
if os.path.exists(image_path):
|
|
print("\nπ Analyzing screenshot...")
|
|
try:
|
|
result = analyze_screenshot(image_path, model, processor)
|
|
print("\nπ― Analysis Result:")
|
|
print("-" * 30)
|
|
print(result)
|
|
print("-" * 30)
|
|
except Exception as e:
|
|
print(f"β Analysis error: {e}")
|
|
else:
|
|
print("β Image file not found!")
|
|
|
|
elif choice == "2":
|
|
print("π Goodbye!")
|
|
break
|
|
else:
|
|
print("β Invalid choice!")
|
|
|
|
except Exception as e:
|
|
print(f"β Error loading model: {e}")
|
|
print("Make sure the model was merged successfully.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|