Djacon
/

rubert-tiny2-russian-emotion-detection

@@ -1,7 +1,7 @@
 ---
 license: mit
-language: ["ru"]
 tags:
 - russian
 - classification
@@ -10,15 +10,16 @@ tags:
 - emotion-recognition
 - multiclass
 widget:
-- text: "Как дела?"
-- text: "Дурак твой дед"
-- text: "Только попробуй!!!"
-- text: "Не хочу в школу("
-- text: "Сейчас ровно час дня"
-- text: "А ты уверен, что эти полоски снизу не врут? Точно уверен? Вот прям 100 процентов?"
 datasets:
-- Djacon/ru_goemotions
 ---
 # First - you should prepare few functions to talk to model
@@ -27,7 +28,7 @@ datasets:
 import torch
 from transformers import BertForSequenceClassification, AutoTokenizer
-LABELS = ['радость', 'интерес', 'удивление', 'печаль', 'гнев', 'отвращение', 'страх', 'вина', 'нейтрально']
 tokenizer = AutoTokenizer.from_pretrained('Djacon/rubert-tiny2-russian-emotion-detection')
 model = BertForSequenceClassification.from_pretrained('Djacon/rubert-tiny2-russian-emotion-detection')
@@ -36,27 +37,28 @@ model = BertForSequenceClassification.from_pretrained('Djacon/rubert-tiny2-russi
 def predict_emotion(text: str) -> str:
     inputs = tokenizer(text, truncation=True, return_tensors='pt')
     inputs = inputs.to(model.device)
     outputs = model(**inputs)
-    pred = torch.nn.functional.softmax(outputs.logits, dim=1)
     pred = pred.argmax(dim=1)
-    return LABELS[pred[0]]
 # Probabilistic prediction of emotion in a text
 @torch.no_grad()
-def predict_emotions(text: str) -> list:
     inputs = tokenizer(text, truncation=True, return_tensors='pt')
     inputs = inputs.to(model.device)
     outputs = model(**inputs)
-    pred = torch.nn.functional.softmax(outputs.logits, dim=1)
     emotions_list = {}
     for i in range(len(pred[0].tolist())):
-        emotions_list[LABELS[i]] = round(pred[0].tolist()[i], 4)
     return emotions_list
 ```
@@ -68,8 +70,8 @@ not_simple_prediction = predict_emotions("Какой же сегодня пре
 print(simple_prediction)
 print(not_simple_prediction)
-# happiness
-# {'neutral': 0.0004941817605867982, 'happiness': 0.9979524612426758, 'sadness': 0.0002536600804887712, 'enthusiasm': 0.0005498139653354883, 'fear': 0.00025326196919195354, 'anger': 0.0003583927755244076, 'disgust': 0.00013807788491249084}
 ```
 # Citations

 ---
 license: mit
+language:
+- ru
 tags:
 - russian
 - classification
 - emotion-recognition
 - multiclass
 widget:
+- text: Как дела?
+- text: Дурак твой дед
+- text: Только попробуй!!!
+- text: Не хочу в школу(
+- text: Сейчас ровно час дня
+- text: >-
+    А ты уверен, что эти полоски снизу не врут? Точно уверен? Вот прям 100
+    процентов?
 datasets:
+- Djacon/ru_go_emotions
 ---
 # First - you should prepare few functions to talk to model
 import torch
 from transformers import BertForSequenceClassification, AutoTokenizer
+LABELS_RU = ['нейтрально', 'радость', 'грусть', 'гнев', 'интерес', 'удивление', 'отвращение', 'страх', 'вина', 'стыд']
 tokenizer = AutoTokenizer.from_pretrained('Djacon/rubert-tiny2-russian-emotion-detection')
 model = BertForSequenceClassification.from_pretrained('Djacon/rubert-tiny2-russian-emotion-detection')
 def predict_emotion(text: str) -> str:
     inputs = tokenizer(text, truncation=True, return_tensors='pt')
     inputs = inputs.to(model.device)
     outputs = model(**inputs)
+    pred = torch.nn.functional.sigmoid(outputs.logits)
     pred = pred.argmax(dim=1)
+    return LABELS_RU[pred[0]]
 # Probabilistic prediction of emotion in a text
 @torch.no_grad()
+def predict_emotions(text: str) -> dict:
     inputs = tokenizer(text, truncation=True, return_tensors='pt')
     inputs = inputs.to(model.device)
     outputs = model(**inputs)
+    pred = torch.nn.functional.sigmoid(outputs.logits)
     emotions_list = {}
     for i in range(len(pred[0].tolist())):
+        emotions_list[LABELS_RU[i]] = round(pred[0].tolist()[i], 4)
     return emotions_list
 ```
 print(simple_prediction)
 print(not_simple_prediction)
+# радость
+# {'нейтрально': 0.1985, 'радость': 0.7419, 'грусть': 0.0261, 'гнев': 0.0295, 'интерес': 0.1983, 'удивление': 0.4305, 'отвращение': 0.0082, 'страх': 0.008, 'вина': 0.0046, 'стыд': 0.0097}
 ```
 # Citations