|
import gradio as gr |
|
import pandas as pd |
|
from datasets import Dataset |
|
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM |
|
import torch |
|
import os |
|
|
|
|
|
def train_model(file, hf_token): |
|
try: |
|
|
|
df = pd.read_csv(file.name) |
|
print(f"Loaded CSV with {len(df)} rows") |
|
|
|
|
|
model_name = "facebook/opt-125m" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
low_cpu_mem_usage=True, |
|
torch_dtype=torch.float32 |
|
) |
|
|
|
|
|
dataset = Dataset.from_pandas(df) |
|
|
|
args = TrainingArguments( |
|
output_dir="./results", |
|
per_device_train_batch_size=1, |
|
num_train_epochs=1, |
|
no_cuda=True, |
|
) |
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=args, |
|
train_dataset=dataset, |
|
tokenizer=tokenizer |
|
) |
|
|
|
return f"Setup successful! Loaded {len(df)} rows for training." |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}\nType: {type(e)}" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=train_model, |
|
inputs=[ |
|
gr.File(label="Upload CSV file"), |
|
gr.Textbox(label="HF Token", type="password") |
|
], |
|
outputs="text", |
|
title="Product Classifier Training (CPU)", |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True, share=True) |