import streamlit as st
import pandas as pd
import yfinance as yf
from textblob import TextBlob
import re
from gensim import corpora, models
from nltk import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk import pos_tag
from nltk.chunk import conlltags2tree, tree2conlltags
import spacy
from spacy import displacy
def fetch_news_data(ticker1, ticker2):
Fetches news data for the given tickers from Yahoo Finance.
ticker1_data = yf.Ticker(ticker1).news
ticker2_data = yf.Ticker(ticker2).news
return ticker1_data, ticker2_data
except Exception as e:
st.error(f"An error occurred while fetching news data: {e}")
return fetch_news_data()
def clean_news_data(news_data):
Cleans the news data by removing special characters and extra spaces.
clean_data = []
for news in news_data:
news_text = re.sub(r"[^\w\s]", "", news['title'])
news_text = " ".join(news_text.split())
return clean_data
def perform_sentiment_analysis(news_data):
Performs sentiment analysis on the given news data using TextBlob.
sentiments = []
for news in news_data:
analysis = TextBlob(news)
return sentiments
def classify_sentiment(sentiment_scores):
Classifies sentiment based on the polarity scores.
sentiments = []
for score in sentiment_scores:
if score > 0:
elif score < 0:
return sentiments
def topic_modeling(news_data):
Performs topic modeling using LDA.
# Tokenize and remove stop words
stop_words = set(stopwords.words('english'))
tokenized_news = [word_tokenize(text) for text in news_data]
tokenized_news = [[word for word in tokens if not word in stop_words] for tokens in tokenized_news]
# Create Dictionary and Corpus
dictionary = corpora.Dictionary(tokenized_news)
corpus = [dictionary.doc2bow(text) for text in tokenized_news]
# Train LDA model
lda_model = models.LdaMulticore(corpus=corpus, id2word=dictionary, num_topics=5, passes=10)
return lda_model, dictionary
def extract_topics(lda_model, dictionary, num_words=5):
Extracts top topics from the LDA model.
topics = []
for idx, topic in lda_model.print_topics(-1):
topic_words = " ".join([word for word, _ in dictionary.items() if word in topic])
topics.append(f"Topic {idx}: {topic_words}")
return topics
def topic_sentiment_analysis(news_data, lda_model, dictionary):
Performs sentiment analysis for each topic.
topic_sentiments = {}
for news, topic_idx in zip(news_data, lda_model.get_document_topics()):
topic_idx = max(topic_idx, key=lambda x: x[1])[0]
if topic_idx not in topic_sentiments:
topic_sentiments[topic_idx] = []
return topic_sentiments
def ner_and_event_detection(news_data):
Performs Named Entity Recognition and Event Detection.
nlp = spacy.load("en_core_web_sm")
events = []
for news in news_data:
doc = nlp(news)
entities = [(ent.text, ent.label_) for ent in doc.ents]
return events
def display_results(ticker1, ticker2, ticker1_sentiments, ticker2_sentiments,
ticker1_topics, ticker2_topics,
ticker1_topic_sentiments=None, ticker2_topic_sentiments=None):
Displays the sentiment analysis results in Streamlit.
ticker1: The first ticker symbol.
ticker2: The second ticker symbol.
ticker1_sentiments: A list of sentiment classifications for ticker1.
ticker2_sentiments: A list of sentiment classifications for ticker2.
ticker1_topics: A list of topics for ticker1.
ticker2_topics: A list of topics for ticker2.
ticker1_topic_sentiments: A dictionary of topic sentiments for ticker1.
ticker2_topic_sentiments: A dictionary of topic sentiments for ticker2.
st.title(f"{ticker1} vs. {ticker2} Sentiment Analysis")
col1, col2 = st.columns(2)
with col1:
st.header(f"{ticker1} Sentiment")
st.header(f"{ticker1} Topics")
for topic in ticker1_topics:
if ticker1_topic_sentiments:
st.header(f"{ticker1} Topic Sentiments")
for topic_idx, sentiments in ticker1_topic_sentiments.items():
st.write(f"Topic {topic_idx}: Average Sentiment = {sum(sentiments) / len(sentiments)}")
with col2:
st.header(f"{ticker2} Sentiment")
st.header(f"{ticker2} Topics")
for topic in ticker2_topics:
if ticker2_topic_sentiments:
st.header(f"{ticker2} Topic Sentiments")
for topic_idx, sentiments in ticker2_topic_sentiments.items():
st.write(f"Topic {topic_idx}: Average Sentiment = {sum(sentiments) / len(sentiments)}")
return display_results |