# -*- coding: utf-8 -*- """Emotion Detection NLP Mental Health""" import string import re import nltk import pandas as pd from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from transformers import pipeline import gradio as gr import matplotlib.pyplot as plt import seaborn as sns import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.feature_extraction.text import TfidfVectorizer from warnings import filterwarnings # NLTK Downloads nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('punkt') nltk.download('wordnet') # Text Preprocessing lemmatizer = WordNetLemmatizer() def clean_text(text): """Cleans and preprocesses the input text.""" text = str(text).lower() text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation text = re.sub(r'\d+', '', text) # Remove numbers tokens = word_tokenize(text) stop_words = set(stopwords.words('english')) tokens = [word for word in tokens if word not in stop_words] # Remove stopwords tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize words return ' '.join(tokens) # Load data data = pd.read_csv("train.csv") data['Context'] = data['Context'].apply(clean_text) # Emotion Detection Model emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base') contexts = data['Context'] emotions = contexts.apply(lambda x: emotion_model(x)[0]['label']) data['Detected_Emotion'] = emotions # Feature Extraction vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(data['Context']) X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix.toarray(), data['Detected_Emotion'], test_size=0.3, random_state=42) # Train a Random Forest Classifier model = RandomForestClassifier() model.fit(X_train, y_train) # Function to predict emotion of new text def predict_emotion(text): """Predicts the emotion for the given text.""" cleaned_text = clean_text(text) tfidf_text = vectorizer.transform([cleaned_text]) predicted_emotion = model.predict(tfidf_text) return predicted_emotion[0] # Gradio Interface iface = gr.Interface(fn=predict_emotion, inputs="text", outputs="text", live=True) # Launch the Gradio Interface if __name__ == "__main__": iface.launch()