Spaces:

krushna123
/

nlp-sentiment-app

Sleeping

App Files Files Community

nlp-sentiment-app / app.py

krushna123

Update app.py

ba1e688 verified 11 days ago

raw

history blame contribute delete

2.52 kB

	# -- coding: utf-8 --
	"""Emotion Detection NLP Mental Health"""

	import string
	import re
	import nltk
	import pandas as pd
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from transformers import pipeline
	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
	from sklearn.feature_extraction.text import TfidfVectorizer
	from warnings import filterwarnings

	# NLTK Downloads
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	nltk.download('punkt')
	nltk.download('wordnet')

	# Text Preprocessing
	lemmatizer = WordNetLemmatizer()

	def clean_text(text):
	"""Cleans and preprocesses the input text."""
	text = str(text).lower()
	text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
	text = re.sub(r'\d+', '', text) # Remove numbers
	tokens = word_tokenize(text)
	stop_words = set(stopwords.words('english'))
	tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
	tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize words
	return ' '.join(tokens)

	# Load data
	data = pd.read_csv("train.csv")
	data['Context'] = data['Context'].apply(clean_text)

	# Emotion Detection Model
	emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
	contexts = data['Context']
	emotions = contexts.apply(lambda x: emotion_model(x)[0]['label'])
	data['Detected_Emotion'] = emotions

	# Feature Extraction
	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform(data['Context'])
	X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix.toarray(), data['Detected_Emotion'], test_size=0.3, random_state=42)

	# Train a Random Forest Classifier
	model = RandomForestClassifier()
	model.fit(X_train, y_train)

	# Function to predict emotion of new text
	def predict_emotion(text):
	"""Predicts the emotion for the given text."""
	cleaned_text = clean_text(text)
	tfidf_text = vectorizer.transform([cleaned_text])
	predicted_emotion = model.predict(tfidf_text)
	return predicted_emotion[0]

	# Gradio Interface
	iface = gr.Interface(fn=predict_emotion, inputs="text", outputs="text", live=True)

	# Launch the Gradio Interface
	if __name__ == "__main__":
	iface.launch()