varun500 commited on
Commit
fb19adb
·
1 Parent(s): cb0541c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import networkx as nx
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ from sklearn.cluster import KMeans
8
+
9
+
10
+ def main():
11
+ st.title("Financial Graph App")
12
+ st.write("Enter a financial sentence and see its similarity to predefined keywords.")
13
+
14
+ # User input
15
+ financial_sentence = st.text_area("Enter the financial sentence", value="")
16
+
17
+ # Check if the user entered a sentence
18
+ if financial_sentence.strip() != "":
19
+ # Predefined keywords
20
+ keywords = [
21
+ "Finance",
22
+ "Fiscal",
23
+ "Quarterly results",
24
+ "Revenue",
25
+ "Profit",
26
+ ]
27
+
28
+ # Load the pre-trained Sentence-Transformers model
29
+ model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
30
+
31
+ # Generate word embeddings for the financial sentence and keywords
32
+ sentence_embedding = model.encode([financial_sentence])
33
+ keyword_embeddings = model.encode(keywords)
34
+
35
+ # Calculate cosine similarity between the sentence embedding and keyword embeddings
36
+ similarity_scores = cosine_similarity(sentence_embedding, keyword_embeddings)[0]
37
+
38
+ # Create a graph
39
+ G = nx.Graph()
40
+
41
+ # Add the sentence embedding as a node to the graph
42
+ G.add_node(financial_sentence, embedding=sentence_embedding[0])
43
+
44
+ # Add the keyword embeddings as nodes to the graph
45
+ for keyword, embedding, similarity in zip(keywords, keyword_embeddings, similarity_scores):
46
+ G.add_node(keyword, embedding=embedding, similarity=similarity)
47
+
48
+ # Add edges between the sentence and keywords with their similarity scores as weights
49
+ for keyword, similarity in zip(keywords, similarity_scores):
50
+ G.add_edge(financial_sentence, keyword, weight=similarity)
51
+
52
+ # Perform KNN clustering on the keyword embeddings
53
+ kmeans = KMeans(n_clusters=3)
54
+ cluster_labels = kmeans.fit_predict(keyword_embeddings)
55
+
56
+ # Add cluster labels as node attributes
57
+ for node, cluster_label in zip(G.nodes, cluster_labels):
58
+ G.nodes[node]["cluster"] = cluster_label
59
+
60
+ # Set node positions using spring layout
61
+ pos = nx.spring_layout(G)
62
+
63
+ # Get unique cluster labels
64
+ unique_clusters = set(cluster_labels)
65
+
66
+ # Assign colors to clusters
67
+ cluster_colors = ["lightblue", "lightgreen", "lightyellow"]
68
+
69
+ # Draw nodes with cluster colors
70
+ nx.draw_networkx_nodes(
71
+ G,
72
+ pos,
73
+ node_color=[cluster_colors[G.nodes[node].get("cluster", 0)] for node in G.nodes],
74
+ node_size=800,
75
+ )
76
+
77
+ # Draw edges
78
+ nx.draw_networkx_edges(G, pos, edge_color="gray", width=1, alpha=0.7)
79
+
80
+ # Draw labels
81
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold")
82
+
83
+ # Draw edge labels (cosine similarity scores)
84
+ edge_labels = nx.get_edge_attributes(G, "weight")
85
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)
86
+
87
+ # Set plot attributes
88
+ plt.title("Financial Context and Keywords")
89
+ plt.axis("off")
90
+
91
+ # Save the graph as an image
92
+ plt.savefig("financial_graph.png")
93
+
94
+ # Show the graph
95
+ st.pyplot()
96
+
97
+ # Save the similarity scores in a CSV file
98
+ df = pd.DataFrame({"Keyword": keywords, "Cosine Similarity": similarity_scores})
99
+ st.write("Similarity Scores:")
100
+ st.dataframe(df)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()