Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""Emotion Detection NLP Mental Health""" | |
import string | |
import re | |
import nltk | |
import pandas as pd | |
from nltk.tokenize import word_tokenize | |
from nltk.corpus import stopwords | |
from nltk.stem import WordNetLemmatizer | |
from transformers import pipeline | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from warnings import filterwarnings | |
# NLTK Downloads | |
nltk.download('punkt_tab') | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
nltk.download('wordnet') | |
# Text Preprocessing | |
lemmatizer = WordNetLemmatizer() | |
def clean_text(text): | |
"""Cleans and preprocesses the input text.""" | |
text = str(text).lower() | |
text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation | |
text = re.sub(r'\d+', '', text) # Remove numbers | |
tokens = word_tokenize(text) | |
stop_words = set(stopwords.words('english')) | |
tokens = [word for word in tokens if word not in stop_words] # Remove stopwords | |
tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize words | |
return ' '.join(tokens) | |
# Load data | |
data = pd.read_csv("train.csv") | |
data['Context'] = data['Context'].apply(clean_text) | |
# Emotion Detection Model | |
emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base') | |
contexts = data['Context'] | |
emotions = contexts.apply(lambda x: emotion_model(x)[0]['label']) | |
data['Detected_Emotion'] = emotions | |
# Feature Extraction | |
vectorizer = TfidfVectorizer() | |
tfidf_matrix = vectorizer.fit_transform(data['Context']) | |
X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix.toarray(), data['Detected_Emotion'], test_size=0.3, random_state=42) | |
# Train a Random Forest Classifier | |
model = RandomForestClassifier() | |
model.fit(X_train, y_train) | |
# Function to predict emotion of new text | |
def predict_emotion(text): | |
"""Predicts the emotion for the given text.""" | |
cleaned_text = clean_text(text) | |
tfidf_text = vectorizer.transform([cleaned_text]) | |
predicted_emotion = model.predict(tfidf_text) | |
return predicted_emotion[0] | |
# Gradio Interface | |
iface = gr.Interface(fn=predict_emotion, inputs="text", outputs="text", live=True) | |
# Launch the Gradio Interface | |
if __name__ == "__main__": | |
iface.launch() | |