import gradio as gr from transformers import AutoTokenizer, AutoConfig, pipeline import torch import os from torch import nn from torch.nn import Dropout from transformers import XLMRobertaForSequenceClassification HF_TOKEN = os.getenv('HF_TOKEN') hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-sentiment") # Define the CustomModel class which is predicting Both SENTIMENT POLARITY & EMOTIONS class CustomModel(XLMRobertaForSequenceClassification): def __init__(self, config, num_emotion_labels): super(CustomModel, self).__init__(config) self.num_emotion_labels = num_emotion_labels self.dropout_emotion = nn.Dropout(config.hidden_dropout_prob) self.emotion_classifier = nn.Sequential( nn.Linear(config.hidden_size, 512), nn.Mish(), nn.Dropout(0.3), nn.Linear(512, num_emotion_labels) ) self._init_weights(self.emotion_classifier[0]) self._init_weights(self.emotion_classifier[3]) def _init_weights(self, module): if isinstance(module, nn.Linear): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) if module.bias is not None: module.bias.data.zero_() def forward(self, input_ids=None, attention_mask=None, sentiment=None, labels=None): outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask) sequence_output = outputs[0] if len(sequence_output.shape) != 3: raise ValueError(f"Expected sequence_output to have 3 dimensions, got {sequence_output.shape}") cls_hidden_states = sequence_output[:, 0, :] cls_hidden_states = self.dropout_emotion(cls_hidden_states) emotion_logits = self.emotion_classifier(cls_hidden_states) with torch.no_grad(): cls_token_state = sequence_output[:, 0, :].unsqueeze(1) sentiment_logits = self.classifier(cls_token_state).squeeze(1) if labels is not None: class_weights = torch.tensor([1.0] * self.num_emotion_labels).to(labels.device) loss_fct = nn.BCEWithLogitsLoss(pos_weight=class_weights) loss = loss_fct(emotion_logits, labels) return {"loss": loss, "emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits} return {"emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits} # Load the tokenizer and model from the local directory model_dir = "gsar78/HellenicSentimentAI_v2" tokenizer = AutoTokenizer.from_pretrained(model_dir) config = AutoConfig.from_pretrained(model_dir) model = CustomModel.from_pretrained(model_dir, config=config, num_emotion_labels=18) # Function to predict sentiment and emotion def predict(texts): # Tokenize the input texts inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512) # Move inputs to the same device as the model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") inputs = {k: v.to(device) for k, v in inputs.items()} # Ensure the model is on the correct device model.to(device) model.eval() # Set the model to evaluation mode # Clear any gradients model.zero_grad() # Get model predictions with torch.no_grad(): outputs = model(**inputs) # Extract logits emotion_logits = outputs["emotion_logits"] sentiment_logits = outputs["sentiment_logits"] # Convert logits to probabilities emotion_probs = torch.sigmoid(emotion_logits) sentiment_probs = torch.softmax(sentiment_logits, dim=1) # Convert tensors to lists for easier handling emotion_probs_list = (emotion_probs * 100).tolist() # Convert to % sentiment_probs_list = (sentiment_probs * 100).tolist() # Convert to % # Define the sentiment and emotion labels sentiment_labels = ['negative', 'neutral', 'positive'] emotion_labels = [ 'joy', 'trust', 'excitement', 'gratitude', 'hope', 'love', 'pride', 'anger', 'disgust', 'fear', 'sadness', 'anxiety', 'frustration', 'guilt', 'disappointment', 'surprise', 'anticipation', 'neutral' ] # Threshold for displaying probabilities threshold = 0.0 # Map emotion probabilities to their corresponding labels emotion_results = [ {label: prob for label, prob in zip(emotion_labels, emotion_probs_sample) if prob > 10.0} for emotion_probs_sample in emotion_probs_list ] # Map sentiment probabilities to their corresponding labels sentiment_results = [ {label: prob for label, prob in zip(sentiment_labels, sentiment_probs_sample) if prob > threshold} for sentiment_probs_sample in sentiment_probs_list ] return emotion_results, sentiment_results def sentiment_analysis_generate_table(text): sentences = text.split('|') emotion_results, sentiment_results = predict(sentences) # Generate the HTML table with enhanced colors and bold headers html = """ """ for sentence, emotions, sentiment in zip(sentences, emotion_results, sentiment_results): text = sentence.strip() sentiment_label = max(sentiment, key=sentiment.get) score = f"{sentiment[sentiment_label]:.2f}%" # Determine the sentiment class if sentiment_label.lower() == "positive": sentiment_class = "positive" elif sentiment_label.lower() == "negative": sentiment_class = "negative" else: sentiment_class = "neutral" # Generate emotion tags emotion_tags = ", ".join([f"{label} ({prob:.2f}%)" for label, prob in emotions.items()]) # Generate table rows html += f'' html += """
Text Score Sentiment Emotions
{text}{score}{sentiment_label}{emotion_tags}
""" return html if __name__ == "__main__": iface = gr.Interface( fn=sentiment_analysis_generate_table, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs=gr.HTML(), title="Hellenic Sentiment AI - Version 2.0", description="A sentiment & emotion analysis model, primarily for the Greek language.
" "Type in some text in Greek, to classify its sentiment & emotion: positive, neutral, or negative, along with detected emotions.
" "Multiple sentences can be classified when separated by the | character.
" "Version 2.0 - Developed by GeoSar", examples=[ ["Η πικάντικη γεύση αυτής της σούπας λαχανικών ήταν ακριβώς αυτό που χρειαζόμουν σήμερα. Είχε μια ωραία γαργαλιστική αίσθηση χωρίς να είναι πολύ καυτερή."], ["Η πίτσα ήταν καμένη και τα υλικά φθηνής ποιότητας. Σίγουρα δεν θα ξαναπαραγγείλω από εκεί."] ], allow_flagging="manual", flagging_options=["Incorrect", "Ambiguous"], flagging_callback=hf_writer, examples_per_page=2, allow_duplication=False, concurrency_limit="default" ) iface.launch(share=True)