Spaces:
Running
Running
File size: 2,488 Bytes
b1c2932 b004555 f24527a b1c2932 2eca954 b1c2932 167d21f 4c2c45d b1c2932 2eca954 6e3bd95 b5e3ca1 b1c2932 7658607 b1c2932 84c21a9 b1c2932 7ec1f1b 4c2c45d 13fe76b b1fbb33 b1c2932 a7a15af b1c2932 f24527a b1c2932 f7e1e22 b1c2932 167d21f 4a11db5 f7e1e22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import os
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from huggingface_hub import HfApi
from label_dicts import MANIFESTO_LABEL_NAMES
from .utils import is_disk_full
HF_TOKEN = os.environ["hf_read"]
languages = [
"Czech", "English", "French", "German", "Hungarian", "Polish", "Slovak"
]
domains = {
"parliamentary speech": "parlspeech",
}
SENTIMENT_LABEL_NAMES = {0: "Negative", 1: "No sentiment or Neutral sentiment", 2: "Positive"}
def build_huggingface_path(language: str):
if language == "Czech" or language == "Slovak":
return "visegradmedia-emotion/Emotion_RoBERTa_pooled_V4"
return "poltextlab/xlm-roberta-large-pooled-MORES"
def predict(text, model_id, tokenizer_id):
device = torch.device("cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
model.to(device)
inputs = tokenizer(text,
max_length=256,
truncation=True,
padding="do_not_pad",
return_tensors="pt").to(device)
model.eval()
with torch.no_grad():
logits = model(**inputs).logits
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
predicted_class_id = probs.argmax()
predicted_class_id = {4: 2, 5: 1}.get(predicted_class_id, 0)
output_pred = SENTIMENT_LABEL_NAMES.get(predicted_class_id, predicted_class_id)
output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id}">{model_id}</a> model.</p>'
return output_pred, output_info
def predict_cap(text, language, domain):
model_id = build_huggingface_path(language)
tokenizer_id = "xlm-roberta-large"
if is_disk_full():
os.system('rm -rf /data/models*')
os.system('rm -r ~/.cache/huggingface/hub')
return predict(text, model_id, tokenizer_id)
demo = gr.Interface(
title="Sentiment (3) Babel Demo",
fn=predict_cap,
inputs=[gr.Textbox(lines=6, label="Input"),
gr.Dropdown(languages, label="Language"),
gr.Dropdown(domains.keys(), label="Domain")],
outputs=[gr.Label(num_top_classes=3, label="Output"), gr.Markdown()])
|