Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoConfig, pipeline | |
import torch | |
import os | |
from torch import nn | |
from torch.nn import Dropout | |
from transformers import XLMRobertaForSequenceClassification | |
HF_TOKEN = os.getenv('HF_TOKEN') | |
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-sentiment") | |
# Define the CustomModel class which is predicting Both SENTIMENT POLARITY & EMOTIONS | |
class CustomModel(XLMRobertaForSequenceClassification): | |
def __init__(self, config, num_emotion_labels): | |
super(CustomModel, self).__init__(config) | |
self.num_emotion_labels = num_emotion_labels | |
self.dropout_emotion = nn.Dropout(config.hidden_dropout_prob) | |
self.emotion_classifier = nn.Sequential( | |
nn.Linear(config.hidden_size, 512), | |
nn.Mish(), | |
nn.Dropout(0.3), | |
nn.Linear(512, num_emotion_labels) | |
) | |
self._init_weights(self.emotion_classifier[0]) | |
self._init_weights(self.emotion_classifier[3]) | |
def _init_weights(self, module): | |
if isinstance(module, nn.Linear): | |
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) | |
if module.bias is not None: | |
module.bias.data.zero_() | |
def forward(self, input_ids=None, attention_mask=None, sentiment=None, labels=None): | |
outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask) | |
sequence_output = outputs[0] | |
if len(sequence_output.shape) != 3: | |
raise ValueError(f"Expected sequence_output to have 3 dimensions, got {sequence_output.shape}") | |
cls_hidden_states = sequence_output[:, 0, :] | |
cls_hidden_states = self.dropout_emotion(cls_hidden_states) | |
emotion_logits = self.emotion_classifier(cls_hidden_states) | |
with torch.no_grad(): | |
cls_token_state = sequence_output[:, 0, :].unsqueeze(1) | |
sentiment_logits = self.classifier(cls_token_state).squeeze(1) | |
if labels is not None: | |
class_weights = torch.tensor([1.0] * self.num_emotion_labels).to(labels.device) | |
loss_fct = nn.BCEWithLogitsLoss(pos_weight=class_weights) | |
loss = loss_fct(emotion_logits, labels) | |
return {"loss": loss, "emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits} | |
return {"emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits} | |
# Load the tokenizer and model from the local directory | |
model_dir = "gsar78/HellenicSentimentAI_v2" | |
tokenizer = AutoTokenizer.from_pretrained(model_dir) | |
config = AutoConfig.from_pretrained(model_dir) | |
model = CustomModel.from_pretrained(model_dir, config=config, num_emotion_labels=18) | |
# Function to predict sentiment and emotion | |
def predict(texts): | |
# Tokenize the input texts | |
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512) | |
# Move inputs to the same device as the model | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Ensure the model is on the correct device | |
model.to(device) | |
model.eval() # Set the model to evaluation mode | |
# Clear any gradients | |
model.zero_grad() | |
# Get model predictions | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Extract logits | |
emotion_logits = outputs["emotion_logits"] | |
sentiment_logits = outputs["sentiment_logits"] | |
# Convert logits to probabilities | |
emotion_probs = torch.sigmoid(emotion_logits) | |
sentiment_probs = torch.softmax(sentiment_logits, dim=1) | |
# Convert tensors to lists for easier handling | |
emotion_probs_list = (emotion_probs * 100).tolist() # Convert to % | |
sentiment_probs_list = (sentiment_probs * 100).tolist() # Convert to % | |
# Define the sentiment and emotion labels | |
sentiment_labels = ['negative', 'neutral', 'positive'] | |
emotion_labels = [ | |
'joy', 'trust', 'excitement', 'gratitude', 'hope', 'love', 'pride', | |
'anger', 'disgust', 'fear', 'sadness', 'anxiety', 'frustration', 'guilt', | |
'disappointment', 'surprise', 'anticipation', 'neutral' | |
] | |
# Threshold for displaying probabilities | |
threshold = 0.0 | |
# Map emotion probabilities to their corresponding labels | |
emotion_results = [ | |
{label: prob for label, prob in zip(emotion_labels, emotion_probs_sample) if prob > 10.0} | |
for emotion_probs_sample in emotion_probs_list | |
] | |
# Map sentiment probabilities to their corresponding labels | |
sentiment_results = [ | |
{label: prob for label, prob in zip(sentiment_labels, sentiment_probs_sample) if prob > threshold} | |
for sentiment_probs_sample in sentiment_probs_list | |
] | |
return emotion_results, sentiment_results | |
def sentiment_analysis_generate_table(text): | |
sentences = text.split('|') | |
emotion_results, sentiment_results = predict(sentences) | |
# Generate the HTML table with enhanced colors and bold headers | |
html = """ | |
<html> | |
<head> | |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bootstrap.min.css"> | |
<style> | |
.label { | |
transition: .15s; | |
border-radius: 8px; | |
padding: 5px 10px; | |
font-size: 14px; | |
text-transform: uppercase; | |
} | |
.positive { | |
background-color: rgb(54, 176, 75); | |
color: white; | |
} | |
.negative { | |
background-color: rgb(237, 83, 80); | |
color: white; | |
} | |
.neutral { | |
background-color: rgb(255, 165, 0); | |
color: white; | |
} | |
th { | |
font-weight: bold; | |
color: rgb(106, 38, 198); | |
} | |
</style> | |
</head> | |
<body> | |
<table class="table table-striped"> | |
<thead> | |
<tr> | |
<th scope="col">Text</th> | |
<th scope="col">Score</th> | |
<th scope="col">Sentiment</th> | |
<th scope="col">Emotions</th> | |
</tr> | |
</thead> | |
<tbody> | |
""" | |
for sentence, emotions, sentiment in zip(sentences, emotion_results, sentiment_results): | |
text = sentence.strip() | |
sentiment_label = max(sentiment, key=sentiment.get) | |
score = f"{sentiment[sentiment_label]:.2f}%" | |
# Determine the sentiment class | |
if sentiment_label.lower() == "positive": | |
sentiment_class = "positive" | |
elif sentiment_label.lower() == "negative": | |
sentiment_class = "negative" | |
else: | |
sentiment_class = "neutral" | |
# Generate emotion tags | |
emotion_tags = ", ".join([f"{label} ({prob:.2f}%)" for label, prob in emotions.items()]) | |
# Generate table rows | |
html += f'<tr><td>{text}</td><td>{score}</td><td><span class="label {sentiment_class}">{sentiment_label}</span></td><td>{emotion_tags}</td></tr>' | |
html += """ | |
</tbody> | |
</table> | |
</body> | |
</html> | |
""" | |
return html | |
if __name__ == "__main__": | |
iface = gr.Interface( | |
fn=sentiment_analysis_generate_table, | |
inputs=gr.Textbox(placeholder="Enter sentence here..."), | |
outputs=gr.HTML(), | |
title="Hellenic Sentiment AI - Version 2.0", | |
description="A sentiment & emotion analysis model, primarily for the Greek language.<br>" | |
"Type in some text in Greek, to classify its sentiment & emotion: positive, neutral, or negative, along with detected emotions.<br>" | |
"Multiple sentences can be classified when separated by the | character.<br>" | |
"Version 2.0 - Developed by GeoSar", | |
examples=[ | |
["Η πικάντικη γεύση αυτής της σούπας λαχανικών ήταν ακριβώς αυτό που χρειαζόμουν σήμερα. Είχε μια ωραία γαργαλιστική αίσθηση χωρίς να είναι πολύ καυτερή."], | |
["Η πίτσα ήταν καμένη και τα υλικά φθηνής ποιότητας. Σίγουρα δεν θα ξαναπαραγγείλω από εκεί."] | |
], | |
allow_flagging="manual", | |
flagging_options=["Incorrect", "Ambiguous"], | |
flagging_callback=hf_writer, | |
examples_per_page=2, | |
allow_duplication=False, | |
concurrency_limit="default" | |
) | |
iface.launch(share=True) | |