Spaces:

gsar78
/

HellenicSentimentAI_v2

Sleeping

App Files Files Community

HellenicSentimentAI_v2 / app.py

gsar78

Create app.py

2f153ef verified 8 months ago

raw

history blame contribute delete

8.3 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoConfig, pipeline
	import torch
	import os
	from torch import nn
	from torch.nn import Dropout
	from transformers import XLMRobertaForSequenceClassification

	HF_TOKEN = os.getenv('HF_TOKEN')
	hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-sentiment")

	# Define the CustomModel class which is predicting Both SENTIMENT POLARITY & EMOTIONS
	class CustomModel(XLMRobertaForSequenceClassification):
	def __init__(self, config, num_emotion_labels):
	super(CustomModel, self).__init__(config)
	self.num_emotion_labels = num_emotion_labels
	self.dropout_emotion = nn.Dropout(config.hidden_dropout_prob)
	self.emotion_classifier = nn.Sequential(
	nn.Linear(config.hidden_size, 512),
	nn.Mish(),
	nn.Dropout(0.3),
	nn.Linear(512, num_emotion_labels)
	)
	self._init_weights(self.emotion_classifier[0])
	self._init_weights(self.emotion_classifier[3])

	def _init_weights(self, module):
	if isinstance(module, nn.Linear):
	module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
	if module.bias is not None:
	module.bias.data.zero_()

	def forward(self, input_ids=None, attention_mask=None, sentiment=None, labels=None):
	outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
	sequence_output = outputs[0]
	if len(sequence_output.shape) != 3:
	raise ValueError(f"Expected sequence_output to have 3 dimensions, got {sequence_output.shape}")
	cls_hidden_states = sequence_output[:, 0, :]
	cls_hidden_states = self.dropout_emotion(cls_hidden_states)
	emotion_logits = self.emotion_classifier(cls_hidden_states)
	with torch.no_grad():
	cls_token_state = sequence_output[:, 0, :].unsqueeze(1)
	sentiment_logits = self.classifier(cls_token_state).squeeze(1)
	if labels is not None:
	class_weights = torch.tensor([1.0] * self.num_emotion_labels).to(labels.device)
	loss_fct = nn.BCEWithLogitsLoss(pos_weight=class_weights)
	loss = loss_fct(emotion_logits, labels)
	return {"loss": loss, "emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits}
	return {"emotion_logits": emotion_logits, "sentiment_logits": sentiment_logits}

	# Load the tokenizer and model from the local directory
	model_dir = "gsar78/HellenicSentimentAI_v2"
	tokenizer = AutoTokenizer.from_pretrained(model_dir)
	config = AutoConfig.from_pretrained(model_dir)
	model = CustomModel.from_pretrained(model_dir, config=config, num_emotion_labels=18)

	# Function to predict sentiment and emotion
	def predict(texts):
	# Tokenize the input texts
	inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)

	# Move inputs to the same device as the model
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Ensure the model is on the correct device
	model.to(device)
	model.eval() # Set the model to evaluation mode

	# Clear any gradients
	model.zero_grad()

	# Get model predictions
	with torch.no_grad():
	outputs = model(**inputs)

	# Extract logits
	emotion_logits = outputs["emotion_logits"]
	sentiment_logits = outputs["sentiment_logits"]

	# Convert logits to probabilities
	emotion_probs = torch.sigmoid(emotion_logits)
	sentiment_probs = torch.softmax(sentiment_logits, dim=1)

	# Convert tensors to lists for easier handling
	emotion_probs_list = (emotion_probs * 100).tolist() # Convert to %
	sentiment_probs_list = (sentiment_probs * 100).tolist() # Convert to %

	# Define the sentiment and emotion labels
	sentiment_labels = ['negative', 'neutral', 'positive']
	emotion_labels = [
	'joy', 'trust', 'excitement', 'gratitude', 'hope', 'love', 'pride',
	'anger', 'disgust', 'fear', 'sadness', 'anxiety', 'frustration', 'guilt',
	'disappointment', 'surprise', 'anticipation', 'neutral'
	]

	# Threshold for displaying probabilities
	threshold = 0.0

	# Map emotion probabilities to their corresponding labels
	emotion_results = [
	{label: prob for label, prob in zip(emotion_labels, emotion_probs_sample) if prob > 10.0}
	for emotion_probs_sample in emotion_probs_list
	]

	# Map sentiment probabilities to their corresponding labels
	sentiment_results = [
	{label: prob for label, prob in zip(sentiment_labels, sentiment_probs_sample) if prob > threshold}
	for sentiment_probs_sample in sentiment_probs_list
	]

	return emotion_results, sentiment_results

	def sentiment_analysis_generate_table(text):
	sentences = text.split('\|')
	emotion_results, sentiment_results = predict(sentences)

	# Generate the HTML table with enhanced colors and bold headers
	html = """
	<html>
	<head>
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bootstrap.min.css">
	<style>
	.label {
	transition: .15s;
	border-radius: 8px;
	padding: 5px 10px;
	font-size: 14px;
	text-transform: uppercase;
	}
	.positive {
	background-color: rgb(54, 176, 75);
	color: white;
	}
	.negative {
	background-color: rgb(237, 83, 80);
	color: white;
	}
	.neutral {
	background-color: rgb(255, 165, 0);
	color: white;
	}
	th {
	font-weight: bold;
	color: rgb(106, 38, 198);
	}
	</style>
	</head>
	<body>
	<table class="table table-striped">
	<thead>
	<tr>
	<th scope="col">Text</th>
	<th scope="col">Score</th>
	<th scope="col">Sentiment</th>
	<th scope="col">Emotions</th>
	</tr>
	</thead>
	<tbody>
	"""
	for sentence, emotions, sentiment in zip(sentences, emotion_results, sentiment_results):
	text = sentence.strip()
	sentiment_label = max(sentiment, key=sentiment.get)
	score = f"{sentiment[sentiment_label]:.2f}%"

	# Determine the sentiment class
	if sentiment_label.lower() == "positive":
	sentiment_class = "positive"
	elif sentiment_label.lower() == "negative":
	sentiment_class = "negative"
	else:
	sentiment_class = "neutral"

	# Generate emotion tags
	emotion_tags = ", ".join([f"{label} ({prob:.2f}%)" for label, prob in emotions.items()])

	# Generate table rows
	html += f'<tr><td>{text}</td><td>{score}</td><td><span class="label {sentiment_class}">{sentiment_label}</span></td><td>{emotion_tags}</td></tr>'

	html += """
	</tbody>
	</table>
	</body>
	</html>
	"""

	return html

	if __name__ == "__main__":
	iface = gr.Interface(
	fn=sentiment_analysis_generate_table,
	inputs=gr.Textbox(placeholder="Enter sentence here..."),
	outputs=gr.HTML(),
	title="Hellenic Sentiment AI - Version 2.0",
	description="A sentiment & emotion analysis model, primarily for the Greek language.<br>"
	"Type in some text in Greek, to classify its sentiment & emotion: positive, neutral, or negative, along with detected emotions.<br>"
	"Multiple sentences can be classified when separated by the \| character.<br>"
	"Version 2.0 - Developed by GeoSar",
	examples=[
	["Η πικάντικη γεύση αυτής της σούπας λαχανικών ήταν ακριβώς αυτό που χρειαζόμουν σήμερα. Είχε μια ωραία γαργαλιστική αίσθηση χωρίς να είναι πολύ καυτερή."],
	["Η πίτσα ήταν καμένη και τα υλικά φθηνής ποιότητας. Σίγουρα δεν θα ξαναπαραγγείλω από εκεί."]
	],
	allow_flagging="manual",
	flagging_options=["Incorrect", "Ambiguous"],
	flagging_callback=hf_writer,
	examples_per_page=2,
	allow_duplication=False,
	concurrency_limit="default"
	)

	iface.launch(share=True)