File size: 5,734 Bytes
d4f51ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
import pandas as pd
import yfinance as yf
from textblob import TextBlob
import re
from gensim import corpora, models
from nltk import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk import pos_tag
from nltk.chunk import conlltags2tree, tree2conlltags
import spacy
from spacy import displacy

def fetch_news_data(ticker1, ticker2):
    """

    Fetches news data for the given tickers from Yahoo Finance.

    """
    try:
        ticker1_data = yf.Ticker(ticker1).news
        ticker2_data = yf.Ticker(ticker2).news
        return ticker1_data, ticker2_data
    except Exception as e:
        st.error(f"An error occurred while fetching news data: {e}")
        return fetch_news_data()

def clean_news_data(news_data):
    """

    Cleans the news data by removing special characters and extra spaces.

    """
    clean_data = []
    for news in news_data:
        news_text = re.sub(r"[^\w\s]", "", news['title'])
        news_text = " ".join(news_text.split())
        clean_data.append(news_text)
    return clean_data

def perform_sentiment_analysis(news_data):
    """

    Performs sentiment analysis on the given news data using TextBlob.

    """
    sentiments = []
    for news in news_data:
        analysis = TextBlob(news)
        sentiments.append(analysis.sentiment.polarity)
    return sentiments

def classify_sentiment(sentiment_scores):
    """

    Classifies sentiment based on the polarity scores.

    """
    sentiments = []
    for score in sentiment_scores:
        if score > 0:
            sentiments.append('Positive')
        elif score < 0:
            sentiments.append('Negative')
        else:
            sentiments.append('Neutral')
    return sentiments

def topic_modeling(news_data):
    """

    Performs topic modeling using LDA.

    """
    # Tokenize and remove stop words
    stop_words = set(stopwords.words('english'))
    tokenized_news = [word_tokenize(text) for text in news_data]
    tokenized_news = [[word for word in tokens if not word in stop_words] for tokens in tokenized_news]

    # Create Dictionary and Corpus
    dictionary = corpora.Dictionary(tokenized_news)
    corpus = [dictionary.doc2bow(text) for text in tokenized_news]

    # Train LDA model
    lda_model = models.LdaMulticore(corpus=corpus, id2word=dictionary, num_topics=5, passes=10)

    return lda_model, dictionary

def extract_topics(lda_model, dictionary, num_words=5):
    """

    Extracts top topics from the LDA model.

    """
    topics = []
    for idx, topic in lda_model.print_topics(-1):
        topic_words = " ".join([word for word, _ in dictionary.items() if word in topic])
        topics.append(f"Topic {idx}: {topic_words}")
    return topics

def topic_sentiment_analysis(news_data, lda_model, dictionary):
    """

    Performs sentiment analysis for each topic.

    """
    topic_sentiments = {}
    for news, topic_idx in zip(news_data, lda_model.get_document_topics()):
        topic_idx = max(topic_idx, key=lambda x: x[1])[0]
        if topic_idx not in topic_sentiments:
            topic_sentiments[topic_idx] = []
        topic_sentiments[topic_idx].append(TextBlob(news).sentiment.polarity)

    return topic_sentiments

def ner_and_event_detection(news_data):
    """

    Performs Named Entity Recognition and Event Detection.

    """
    nlp = spacy.load("en_core_web_sm")
    events = []
    for news in news_data:
        doc = nlp(news)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        events.extend(entities)

    return events

def display_results(ticker1, ticker2, ticker1_sentiments, ticker2_sentiments,

                    ticker1_topics, ticker2_topics, 

                    ticker1_topic_sentiments=None, ticker2_topic_sentiments=None):
    """

    Displays the sentiment analysis results in Streamlit.



    Args:

        ticker1: The first ticker symbol.

        ticker2: The second ticker symbol.

        ticker1_sentiments: A list of sentiment classifications for ticker1.

        ticker2_sentiments: A list of sentiment classifications for ticker2.

        ticker1_topics: A list of topics for ticker1.

        ticker2_topics: A list of topics for ticker2.

        ticker1_topic_sentiments: A dictionary of topic sentiments for ticker1.

        ticker2_topic_sentiments: A dictionary of topic sentiments for ticker2.

    """
    st.title(f"{ticker1} vs. {ticker2} Sentiment Analysis")

    col1, col2 = st.columns(2)
    with col1:
        st.header(f"{ticker1} Sentiment")
        st.bar_chart(pd.Series(ticker1_sentiments).value_counts())
        st.header(f"{ticker1} Topics")
        for topic in ticker1_topics:
            st.write(topic)
        if ticker1_topic_sentiments:
            st.header(f"{ticker1} Topic Sentiments")
            for topic_idx, sentiments in ticker1_topic_sentiments.items():
                st.write(f"Topic {topic_idx}: Average Sentiment = {sum(sentiments) / len(sentiments)}")

    with col2:
        st.header(f"{ticker2} Sentiment")
        st.bar_chart(pd.Series(ticker2_sentiments).value_counts())
        st.header(f"{ticker2} Topics")
        for topic in ticker2_topics:
            st.write(topic)
        if ticker2_topic_sentiments:
            st.header(f"{ticker2} Topic Sentiments")
            for topic_idx, sentiments in ticker2_topic_sentiments.items():
                st.write(f"Topic {topic_idx}: Average Sentiment = {sum(sentiments) / len(sentiments)}")

    return display_results