from flask import Flask, request, render_template, make_response from flask_sqlalchemy import SQLAlchemy from sentiment_model import preprocess_text, analyze_sentiment, read_file from wordcloud import WordCloud import os import nltk # Ensure NLTK uses a writable directory inside the container NLTK_DIR = os.environ.get('NLTK_DATA', os.path.join(os.getcwd(), 'nltk_data')) os.makedirs(NLTK_DIR, exist_ok=True) if NLTK_DIR not in nltk.data.path: nltk.data.path.insert(0, NLTK_DIR) # Download required NLTK resources to the writable dir (no-op if present) for pkg in ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger']: try: nltk.download(pkg, download_dir=NLTK_DIR, quiet=True) except Exception: pass app = Flask(__name__, static_folder='static') app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///sentiment_data.db' app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False db = SQLAlchemy(app) # Define SentimentRecord model class SentimentRecord(db.Model): id = db.Column(db.Integer, primary_key=True) original_text = db.Column(db.Text, nullable=False) cleaned_text = db.Column(db.Text, nullable=False) removed_text = db.Column(db.Text, nullable=False) normalized_text = db.Column(db.Text, nullable=False) tokenized_text = db.Column(db.Text, nullable=False) stemmed_text = db.Column(db.Text, nullable=False) lemmatized_text = db.Column(db.Text, nullable=False) sentiment = db.Column(db.String(20), nullable=False) ner = db.Column(db.Text, nullable=False) pos = db.Column(db.Text, nullable=False) with app.app_context(): db.create_all() # Global variables to store the analysis result analysis_result = {} @app.route('/') def home(): return render_template('index.html', sentiment=None, text=None, file_uploaded=None, model_type='default') @app.route('/analyze', methods=['POST']) def analyze(): global analysis_result # To store the results globally for the download text = request.form.get('text', '').strip() file = request.files.get('file') model_type = request.form.get('model_type', 'default') file_uploaded = False if file and file.filename != '': text = read_file(file) file_uploaded = True if not text or len(text.split()) < 4: return render_template('index.html', error='Please provide at least 4 words for analysis.', text=text, model_type=model_type, file_uploaded=file_uploaded) word_count = len(text.split()) if word_count > 300: return render_template('index.html', error='Input text exceeds the 300-word limit.', text=text, model_type=model_type, file_uploaded=file_uploaded) try: # Step 1: Preprocess text (cleaning, normalization, etc.) cleaned_text, removed_text, normalized_text, tokenized_text, stemmed_text, lemmatized_text, ner, pos = preprocess_text(text) # Step 2: Use lemmatized text for sentiment analysis lemmatized_text_joined = " ".join(lemmatized_text) sentiment, probabilities = analyze_sentiment(lemmatized_text_joined, model_type=model_type) # Word-level sentiment analysis neutral_words, positive_words, negative_words = [], [], [] if model_type != 'emotion': for word in lemmatized_text: word_sentiment, _ = analyze_sentiment(word, model_type=model_type) if word_sentiment == 'POSITIVE': positive_words.append(word) elif word_sentiment == 'NEGATIVE': negative_words.append(word) elif word_sentiment == 'NEUTRAL': neutral_words.append(word) word_sentiment_distribution = { 'positive': len(positive_words), 'neutral': len(neutral_words), 'negative': len(negative_words) } else: # Emotion model word-level sentiment analysis emotion_counters = { 'ANGER': 0, 'DISGUST': 0, 'FEAR': 0, 'JOY': 0, 'NEUTRAL': 0, 'SADNESS': 0, 'SURPRISE': 0 } emotion_words = { 'ANGER': [], 'DISGUST': [], 'FEAR': [], 'JOY': [], 'NEUTRAL': [], 'SADNESS': [], 'SURPRISE': [] } for word in lemmatized_text: word_sentiment, _ = analyze_sentiment(word, model_type=model_type) if word_sentiment in emotion_counters: emotion_counters[word_sentiment] += 1 emotion_words[word_sentiment].append(word) word_sentiment_distribution = { 'anger': emotion_counters['ANGER'], 'disgust': emotion_counters['DISGUST'], 'fear': emotion_counters['FEAR'], 'joy': emotion_counters['JOY'], 'neutral': emotion_counters['NEUTRAL'], 'sadness': emotion_counters['SADNESS'], 'surprise': emotion_counters['SURPRISE'] } # Store the analysis result in global variable for download analysis_result = { 'sentiment': sentiment, 'model_type': model_type, 'cleaned_text': cleaned_text, 'removed_text': removed_text, 'normalized_text': normalized_text, 'tokenized_text': tokenized_text, 'stemmed_text': stemmed_text, 'lemmatized_text': lemmatized_text, 'ner': ner, 'pos': pos, 'original_text': text, 'word_sentiment_distribution': word_sentiment_distribution, 'positive_words': positive_words, 'negative_words': negative_words, 'neutral_words': neutral_words if model_type != 'emotion' else [], 'emotion_words': emotion_words if model_type == 'emotion' else None } # Generate Word Cloud wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_joined) wordcloud_path = os.path.join('static', 'wordcloud.png') wordcloud.to_file(wordcloud_path) return render_template('index.html', sentiment=sentiment, cleaned_text=cleaned_text, removed_text=removed_text, normalized_text=normalized_text, tokenized_text=tokenized_text, stemmed_text=" ".join(stemmed_text), lemmatized_text=" ".join(lemmatized_text), ner=ner, pos=pos, probabilities=probabilities, wordcloud_url=wordcloud_path, word_sentiment_distribution=word_sentiment_distribution, positive_words=positive_words, negative_words=negative_words, neutral_words=neutral_words if model_type != 'emotion' else [], emotion_words=emotion_words if model_type == 'emotion' else None, text=text, model_type=model_type, total_words=len(tokenized_text), file_uploaded=file_uploaded) except Exception as e: print(f"Error: {e}") return render_template('index.html', error='An error occurred during analysis.', text=text, model_type=model_type, file_uploaded=file_uploaded) @app.route('/download') def download_result(): global analysis_result try: if not analysis_result: return "No analysis available for download", 400 # Build content for the TXT file content = f""" Sentiment Overall Sentiment: {analysis_result['sentiment']} Model Used Selected Model: {analysis_result['model_type']} Original Text: {analysis_result['original_text']} Text Preprocessing Results Cleaned Text: {analysis_result['cleaned_text']} Removed Text: {analysis_result['removed_text']} Normalized Text: {analysis_result['normalized_text']} Tokenized Text: {', '.join(analysis_result['tokenized_text'])} Stemmed Text: {" ".join(analysis_result['stemmed_text'])} Lemmatized Text: {" ".join(analysis_result['lemmatized_text'])} Named Entities (NER): {', '.join([f"{entity[0]} ({entity[1]})" for entity in analysis_result['ner']])} POS Tags: {', '.join([f"{word} ({tag})" for word, tag in analysis_result['pos']])} Total Words: {len(analysis_result['tokenized_text'])} """ # If the model is 'emotion', include emotion-based words if analysis_result['model_type'] == 'emotion': content += "\nEmotion-Specific Words:\n" for emotion, words in analysis_result['emotion_words'].items(): content += f"{emotion.capitalize()} Words: {len(words)}\n" content += f"{', '.join(words)}\n" # Otherwise, include positive, neutral, and negative words for other models else: content += f""" Positive Words: {len(analysis_result['positive_words'])} {', '.join(analysis_result['positive_words'])} Neutral Words: {len(analysis_result['neutral_words'])} {', '.join(analysis_result['neutral_words'])} Negative Words: {len(analysis_result['negative_words'])} {', '.join(analysis_result['negative_words'])} """ # Create a response object with the content response = make_response(content) response.headers["Content-Disposition"] = "attachment; filename=sentiment_analysis_result.txt" response.headers["Content-Type"] = "text/plain" return response except Exception as e: print(f"Error during file download: {e}") return "Error in generating file", 500 if __name__ == '__main__': port = int(os.environ.get('PORT', 7860)) app.run(host='0.0.0.0', port=port)