import streamlit as st |
import pandas as pd |
import numpy as np |
import pickle |
import re |
import os |
import tensorflow |
from tensorflow.keras.models import load_model |
from tensorflow.keras.preprocessing.sequence import pad_sequences |
from PIL import Image |
import random |
import io |
import plotly.express as px |
import matplotlib.pyplot as plt |
from wordcloud import WordCloud |
import nltk |
from nltk.corpus import stopwords |
import tweepy |
nltk.download("stopwords") |
stop_words = set(stopwords.words("english")) |
st.set_page_config(page_title="Twitter Sentiment Analyzer", layout="wide") |
@st.cache_resource |
def load_tokenizer(): |
with open("tokenizer.pkl", "rb") as handle: |
return pickle.load(handle) |
@st.cache_resource |
def download_model(): |
drive_link = "https://drive.google.com/file/d/1AH9w7IzeKx3UN_d-FQY7r61dNboG5v8F/view?usp=sharing" |
model_file = "text_classification_model.h5" |
if not os.path.exists(model_file): |
gdown.download(drive_link, model_file, quiet=False) |
return model_file |
@st.cache_resource |
def load_sentiment_model(): |
model_file = download_model() |
return load_model(model_file) |
tokenizer = load_tokenizer() |
model = load_sentiment_model() |
def preprocess_text(text): |
text=text.lower() |
text=re.sub(r'@\w+',"",text) |
text = re.sub(r'http\S+', '', text) |
text=re.sub(r"[^a-zA-Z\s]","",text) |
words = text.split() |
words=[word for word in words if word not in stop_words] |
return " ".join(words) |
def predict_tweet_sentiment(tweet): |
processed_tweet = preprocess_text(tweet) |
sequence = tokenizer.texts_to_sequences([processed_tweet]) |
max_len = 50 |
padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post') |
prediction = model.predict(padded_sequence) |
sentiment = "Positive π" if prediction > 0.5 else "Negative π" |
return sentiment, float(prediction[0][0]) |
def main(): |
st.sidebar.title("Navigation") |
selection = st.sidebar.radio("Go to", ["Home", "Tweet Analysis", "Upload Tweets Dataset", |
"Fetch Live Tweets", "About"]) |
if selection == "Home": |
show_home() |
elif selection == "Tweet Analysis": |
show_live_tweet_analysis() |
elif selection == "Upload Tweets Dataset": |
show_upload_dataset() |
elif selection == "Fetch Live Tweets": |
fetch_live_tweets() |
elif selection == "About": |
show_about() |
def show_home(): |
st.title("Welcome to the Twitter Sentiment Analyzer") |
st.write(""" |
This application fetches live tweets from Twitter/X and analyzes their sentiment. |
You can also upload your own dataset of reviews to get insights on sentiment distribution. |
""") |
st.subheader("App Intro Video") |
video_file = open("video1642990305.mp4", "rb") |
video_bytes = video_file.read() |
st.video(video_bytes, start_time=0) |
st.subheader("Use the sidebar to navigate different sections of the application.") |
def show_live_tweet_analysis(): |
st.title("π’ Live Tweet Sentiment Analysis") |
st.write("Analyze the sentiment of a single tweet in real time!") |
user_tweet = st.text_area("βοΈ Enter a tweet:", placeholder="Type a tweet here...") |
if st.button("π Analyze Sentiment"): |
if user_tweet.strip(): |
sentiment, score = predict_tweet_sentiment(user_tweet) |
st.markdown(f"## Sentiment: {sentiment}") |
st.markdown("### π Sentiment Confidence Score") |
progress_color = "green" if score > 0.6 else "red" |
st.progress(int(score * 100)) |
st.metric(label="Confidence Score", value=f"{score:.2%}", delta=random.uniform(-0.02, 0.02)) |
if score > 0.8: |
st.success("π΅ Highly Confident Prediction!") |
elif score > 0.5: |
st.info("π‘ Moderate Confidence") |
else: |
st.warning("π΄ Low Confidence") |
st.markdown("### βοΈ Word Cloud of Tweet") |
preprocessed_tweet = preprocess_text(user_tweet) |
wordcloud = WordCloud(width=500, height=300, background_color="black").generate(preprocessed_tweet) |
fig, ax = plt.subplots() |
ax.imshow(wordcloud, interpolation="bilinear") |
ax.axis("off") |
st.pyplot(fig) |
st.markdown("### π Sentiment History") |
history_data = pd.DataFrame({ |
"Tweet Number": list(range(1, 11)), |
"Sentiment Score": np.round(np.random.uniform(0, 1, 10), 2) |
}) |
fig = px.line(history_data, x="Tweet Number", y="Sentiment Score", |
title="Sentiment Score Over Time", |
markers=True, line_shape="spline") |
st.plotly_chart(fig) |
else: |
st.warning("β οΈ Please enter a tweet to analyze.") |
def read_file(uploaded_file): |
file_type = uploaded_file.name.split('.')[-1] |
if file_type == "csv": |
df = pd.read_csv(uploaded_file) |
elif file_type in ["xls", "xlsx"]: |
df = pd.read_excel(uploaded_file) |
else: |
st.error("Unsupported file format! Please upload a CSV or Excel file.") |
return None |
if "text" not in df.columns: |
st.error("The uploaded file must contain a 'text' column.") |
return None |
return df |
def show_upload_dataset(): |
st.title("π Bulk Tweet Sentiment Analysis") |
uploaded_file = st.file_uploader("Upload a CSV or Excel file with tweets", type=["csv", "xlsx", "xls"]) |
if uploaded_file is not None: |
df = read_file(uploaded_file) |
if df is None: |
return |
st.write("### First 5 Rows of Uploaded File:") |
st.dataframe(df.head()) |
df["Sentiment"], df["Score"] = zip(*df["text"].apply(predict_tweet_sentiment)) |
st.write("### First 5 Rows After Sentiment Analysis:") |
st.dataframe(df.head()) |
sentiment_counts = df["Sentiment"].value_counts().reset_index() |
sentiment_counts.columns = ["Sentiment", "Count"] |
fig = px.bar( |
sentiment_counts, |
x="Sentiment", |
y="Count", |
title="Sentiment Distribution", |
color="Sentiment", |
text="Count", |
color_discrete_map={"Positive π": "green", "Negative π": "red"} |
) |
st.plotly_chart(fig) |
file_type = uploaded_file.name.split('.')[-1] |
if file_type == "csv": |
output_file = df.to_csv(index=False).encode("utf-8") |
st.download_button("Download Predictions (CSV)", output_file, "predicted_tweets.csv", "text/csv") |
else: |
excel_buffer = io.BytesIO() |
with pd.ExcelWriter(excel_buffer, engine="openpyxl") as writer: |
df.to_excel(writer, index=False) |
excel_buffer.seek(0) |
st.download_button("Download Predictions (Excel)", excel_buffer, "predicted_tweets.xlsx","application/vnd.openxmlformatsofficedocument.spreadsheetml.sheet") |
def fetch_tweets(api_key, api_secret, access_token, access_token_secret, bearer_token, query, count=10): |
try: |
client = tweepy.Client(bearer_token=bearer_token) |
tweets = client.search_recent_tweets(query=query, max_results=count, tweet_fields=["created_at"]) |
tweet_list = [{"Tweet": tweet.text, "Time": tweet.created_at} for tweet in tweets.data] |
return pd.DataFrame(tweet_list) |
except Exception as e: |
st.error(f"β Error fetching tweets: {e}") |
return None |
def fetch_live_tweets(): |
st.title("π΄ Fetch Live Tweets in Real-time") |
with st.sidebar: |
st.subheader("π Enter Twitter API Keys") |
api_key = st.text_input("API Key", type="password") |
api_secret = st.text_input("API Secret", type="password") |
access_token = st.text_input("Access Token", type="password") |
access_token_secret = st.text_input("Access Token Secret", type="password") |
bearer_token = st.text_input("Bearer Token", type="password") |
st.subheader("π’ Search for Tweets") |
query = st.text_input("Enter a keyword or hashtag:") |
count = st.slider("Number of tweets to fetch:", 5, 50, 10) |
if st.button("π Fetch Tweets"): |
if all([api_key, api_secret, access_token, access_token_secret, bearer_token, query]): |
tweets_df = fetch_tweets(api_key, api_secret, access_token, access_token_secret, bearer_token, query, count) |
if tweets_df is not None and not tweets_df.empty: |
Successfully fetched {len(tweets_df)} tweets for '{query}'") |
st.dataframe(tweets_df) |
csv = tweets_df.to_csv(index=False).encode("utf-8") |
st.download_button("π₯ Download CSV", csv, "live_tweets.csv", "text/csv") |
else: |
st.warning("β οΈ No tweets found. Try a different keyword.") |
else: |
st.warning("β οΈ Please enter all API keys and a search query.") |
def show_about(): |
st.title("About This Application") |
st.write(""" |
This Twitter Sentiment Analyzer uses an LSTM model trained on tweets to predict sentiments. |
You can analyze individual tweets, upload datasets, and visualize sentiment trends. |
""") |
import streamlit as st |
def show_about(): |
st.title("About This App") |
st.markdown(""" |
## π **Twitter Sentiment Analysis** |
This application analyzes the sentiment of tweets using a **deep learning model (LSTM)**. |
It can process: |
- **Single tweets** (Live Tweet Analysis) π’ |
- **Bulk tweets** from uploaded CSV/Excel files π |
The app predicts whether a tweet expresses **Positive π or Negative π** sentiment. |
--- |
## π **Technologies Used** |
- **Natural Language Processing (NLP)** for text processing |
- **TensorFlow/Keras (LSTM Model)** for sentiment classification |
- **Streamlit** for building an interactive UI |
- **Plotly** for data visualizations |
- **Pandas & NumPy** for data manipulation |
--- |
## π **Data & Model Information** |
- The model was trained on the **Hugging Face Tweets Dataset (180,000 tweets)** |
- Achieved **83% accuracy** on test data |
- Preprocessing steps include **tokenization, padding, and stopword removal** |
--- |
## π¨βπ» **Developer** |
- **[Sachin Balhara]** β Data Scientist & AI Developer |
- **GitHub:** [https://github.com/SACHINBALHARA] |
- **LinkedIn:** [www.linkedin.com/in/sachin-balhara-05a084271] |
- **Website:** [Your Website](#) |
--- |
π *This project is open-source! Feel free to contribute & improve it.* π― |
""") |
if __name__ == "__main__": |
main() |