File size: 901 Bytes
d632c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load Pretrained Model & Tokenizer (XLM-Roberta for multilingual text classification)
MODEL_NAME = "xlm-roberta-base"
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=5)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Classification Function
def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    label = torch.argmax(outputs.logits, dim=1).item()
    return f"Predicted Category: {label}"

# Gradio UI
demo = gr.Interface(fn=classify_text, inputs=gr.Textbox(lines=2, placeholder="Enter business document text..."), 
                    outputs="text", title="Multilingual Business Document Classifier")

demo.launch()