gsar78's picture
2f153ef verified
history blame
8.3 kB
import gradio as gr
from transformers import AutoTokenizer, AutoConfig, pipeline
import torch
import os
from torch import nn
from torch.nn import Dropout
from transformers import XLMRobertaForSequenceClassification
HF_TOKEN = os.getenv('HF_TOKEN')
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-sentiment")
# Define the CustomModel class which is predicting Both SENTIMENT POLARITY & EMOTIONS
class CustomModel(XLMRobertaForSequenceClassification):
def __init__(self, config, num_emotion_labels):
super(CustomModel, self).__init__(config)
self.num_emotion_labels = num_emotion_labels
self.dropout_emotion = nn.Dropout(config.hidden_dropout_prob)
self.emotion_classifier = nn.Sequential(
nn.Linear(config.hidden_size, 512),
nn.Linear(512, num_emotion_labels)
def _init_weights(self, module):
if isinstance(module, nn.Linear):, std=self.config.initializer_range)
if module.bias is not None:
def forward(self, input_ids=None, attention_mask=None, sentiment=None, labels=None):
outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = outputs[0]
if len(sequence_output.shape) != 3:
raise ValueError(f"Expected sequence_output to have 3 dimensions, got {sequence_output.shape}")
cls_hidden_states = sequence_output[:, 0, :]
cls_hidden_states = self.dropout_emotion(cls_hidden_states)
emotion_logits = self.emotion_classifier(cls_hidden_states)
with torch.no_grad():
cls_token_state = sequence_output[:, 0, :].unsqueeze(1)
sentiment_logits = self.classifier(cls_token_state).squeeze(1)
if labels is not None:
class_weights = torch.tensor([1.0] * self.num_emotion_labels).to(labels.device)
loss_fct = nn.BCEWithLogitsLoss(pos_weight=class_weights)
loss = loss_fct(emotion_logits, labels)
return {"loss": loss, "emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits}
return {"emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits}
# Load the tokenizer and model from the local directory
model_dir = "gsar78/HellenicSentimentAI_v2"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
config = AutoConfig.from_pretrained(model_dir)
model = CustomModel.from_pretrained(model_dir, config=config, num_emotion_labels=18)
# Function to predict sentiment and emotion
def predict(texts):
# Tokenize the input texts
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
# Move inputs to the same device as the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = {k: for k, v in inputs.items()}
# Ensure the model is on the correct device
model.eval() # Set the model to evaluation mode
# Clear any gradients
# Get model predictions
with torch.no_grad():
outputs = model(**inputs)
# Extract logits
emotion_logits = outputs["emotion_logits"]
sentiment_logits = outputs["sentiment_logits"]
# Convert logits to probabilities
emotion_probs = torch.sigmoid(emotion_logits)
sentiment_probs = torch.softmax(sentiment_logits, dim=1)
# Convert tensors to lists for easier handling
emotion_probs_list = (emotion_probs * 100).tolist() # Convert to %
sentiment_probs_list = (sentiment_probs * 100).tolist() # Convert to %
# Define the sentiment and emotion labels
sentiment_labels = ['negative', 'neutral', 'positive']
emotion_labels = [
'joy', 'trust', 'excitement', 'gratitude', 'hope', 'love', 'pride',
'anger', 'disgust', 'fear', 'sadness', 'anxiety', 'frustration', 'guilt',
'disappointment', 'surprise', 'anticipation', 'neutral'
# Threshold for displaying probabilities
threshold = 0.0
# Map emotion probabilities to their corresponding labels
emotion_results = [
{label: prob for label, prob in zip(emotion_labels, emotion_probs_sample) if prob > 10.0}
for emotion_probs_sample in emotion_probs_list
# Map sentiment probabilities to their corresponding labels
sentiment_results = [
{label: prob for label, prob in zip(sentiment_labels, sentiment_probs_sample) if prob > threshold}
for sentiment_probs_sample in sentiment_probs_list
return emotion_results, sentiment_results
def sentiment_analysis_generate_table(text):
sentences = text.split('|')
emotion_results, sentiment_results = predict(sentences)
# Generate the HTML table with enhanced colors and bold headers
html = """
<link rel="stylesheet" href="[email protected]/css/bootstrap.min.css">
.label {
transition: .15s;
border-radius: 8px;
padding: 5px 10px;
font-size: 14px;
text-transform: uppercase;
.positive {
background-color: rgb(54, 176, 75);
color: white;
.negative {
background-color: rgb(237, 83, 80);
color: white;
.neutral {
background-color: rgb(255, 165, 0);
color: white;
th {
font-weight: bold;
color: rgb(106, 38, 198);
<table class="table table-striped">
<th scope="col">Text</th>
<th scope="col">Score</th>
<th scope="col">Sentiment</th>
<th scope="col">Emotions</th>
for sentence, emotions, sentiment in zip(sentences, emotion_results, sentiment_results):
text = sentence.strip()
sentiment_label = max(sentiment, key=sentiment.get)
score = f"{sentiment[sentiment_label]:.2f}%"
# Determine the sentiment class
if sentiment_label.lower() == "positive":
sentiment_class = "positive"
elif sentiment_label.lower() == "negative":
sentiment_class = "negative"
sentiment_class = "neutral"
# Generate emotion tags
emotion_tags = ", ".join([f"{label} ({prob:.2f}%)" for label, prob in emotions.items()])
# Generate table rows
html += f'<tr><td>{text}</td><td>{score}</td><td><span class="label {sentiment_class}">{sentiment_label}</span></td><td>{emotion_tags}</td></tr>'
html += """
return html
if __name__ == "__main__":
iface = gr.Interface(
inputs=gr.Textbox(placeholder="Enter sentence here..."),
title="Hellenic Sentiment AI - Version 2.0",
description="A sentiment & emotion analysis model, primarily for the Greek language.<br>"
"Type in some text in Greek, to classify its sentiment & emotion: positive, neutral, or negative, along with detected emotions.<br>"
"Multiple sentences can be classified when separated by the | character.<br>"
"Version 2.0 - Developed by GeoSar",
["Η πικάντικη γεύση αυτής της σούπας λαχανικών ήταν ακριβώς αυτό που χρειαζόμουν σήμερα. Είχε μια ωραία γαργαλιστική αίσθηση χωρίς να είναι πολύ καυτερή."],
["Η πίτσα ήταν καμένη και τα υλικά φθηνής ποιότητας. Σίγουρα δεν θα ξαναπαραγγείλω από εκεί."]
flagging_options=["Incorrect", "Ambiguous"],