jfataphd commited on
Commit
f21967a
·
1 Parent(s): 82bee27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -44
app.py CHANGED
@@ -8,8 +8,7 @@ import squarify
8
  import numpy as np
9
 
10
  # Define the HTML and CSS styles
11
- st.markdown(
12
- """
13
  <style>
14
  body {
15
  background-color: #EBF5FB;
@@ -20,33 +19,38 @@ st.markdown(
20
  # color: #ffffff;
21
  }
22
  </style>
23
- """,
24
- unsafe_allow_html=True
25
- )
26
 
27
  st.header("Word2Vec App for Clotting Pubmed Database.")
28
 
29
- text_input_value = st.text_input("Enter one term to search within the Clotting database")
30
  query = text_input_value
31
  query = query.lower()
32
  # query = input ("Enter your keyword(s):")
33
-
34
  if query:
35
- bar = st.progress(0)
36
- time.sleep(.2)
37
- st.caption(":LightSkyBlue[searching 40123 PubMed abstracts]")
38
- for i in range(10):
39
- bar.progress((i+1)*10)
40
- time.sleep(.1)
41
-
42
- model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
43
- words = list(model.wv.key_to_index)
44
- X = model.wv[model.wv.key_to_index]
45
- model2 = model.wv[query]
46
- df = pd.DataFrame(X)
47
-
48
-
49
- # def findRelationships(query, df):
 
 
 
 
 
 
 
 
50
  table = model.wv.most_similar_cosmul(query, topn=10000)
51
  table = (pd.DataFrame(table))
52
  table.index.name = 'Rank'
@@ -70,7 +74,8 @@ if query:
70
  color = [cmap[i] for i in range(len(sizes))]
71
 
72
  short_table.set_index('Word', inplace=True)
73
- squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, edgecolor="#EBF5FB", text_kwargs={'fontsize': 10})
 
74
  # # plot the treemap using matplotlib
75
  plt.axis('off')
76
  fig = plt.gcf()
@@ -80,11 +85,7 @@ if query:
80
  plt.clf()
81
 
82
  csv = table.head(100).to_csv().encode('utf-8')
83
- st.download_button(
84
- label="download top 100 words (csv)",
85
- data=csv,
86
- file_name='clotting_words.csv',
87
- mime='text/csv')
88
 
89
  # st.write(short_table)
90
  #
@@ -104,14 +105,15 @@ if query:
104
  st.subheader(f"Top 10 Genes closely related to {query}")
105
 
106
  df10 = df1.head(10)
107
- df10.index = 1/df10.index
108
  sizes = df10.index.tolist()
109
 
110
  cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
111
  color2 = [cmap2[i] for i in range(len(sizes))]
112
 
113
  df10.set_index('Human Gene', inplace=True)
114
- squarify.plot(sizes=sizes, label=df10.index.tolist(), color=color2, edgecolor="#EBF5FB", text_kwargs={'fontsize': 12})
 
115
  #
116
  # # plot the treemap using matplotlib
117
 
@@ -124,24 +126,11 @@ if query:
124
  st.pyplot(fig2)
125
 
126
  csv = df1.head(100).to_csv().encode('utf-8')
127
- st.download_button(
128
- label="download top 100 genes (csv)",
129
- data=csv,
130
- file_name='clotting_genes.csv',
131
- mime='text/csv')
132
-
133
-
134
-
135
-
136
 
137
  # findRelationships(query, df)
138
 
139
 
140
-
141
-
142
-
143
-
144
-
145
  # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
146
  # similar_words = model.most_similar(word)
147
  # output = json.dumps({"word": word, "similar_words": similar_words})
 
8
  import numpy as np
9
 
10
  # Define the HTML and CSS styles
11
+ st.markdown("""
 
12
  <style>
13
  body {
14
  background-color: #EBF5FB;
 
19
  # color: #ffffff;
20
  }
21
  </style>
22
+ """, unsafe_allow_html=True)
 
 
23
 
24
  st.header("Word2Vec App for Clotting Pubmed Database.")
25
 
26
+ text_input_value = st.text_input("Enter one term to search within the Clotting database", max_chars=50)
27
  query = text_input_value
28
  query = query.lower()
29
  # query = input ("Enter your keyword(s):")
 
30
  if query:
31
+
32
+ if query.isalpha():
33
+ bar = st.progress(0)
34
+ time.sleep(.2)
35
+ st.caption(":LightSkyBlue[searching 40123 PubMed abstracts]")
36
+ for i in range(10):
37
+ bar.progress((i + 1) * 10)
38
+ time.sleep(.1)
39
+ else:
40
+ st.write('Please omit numbers in term')
41
+
42
+ try:
43
+ model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
44
+ words = list(model.wv.key_to_index)
45
+ X = model.wv[model.wv.key_to_index]
46
+ model2 = model.wv[query]
47
+ df = pd.DataFrame(X)
48
+
49
+ except:
50
+ st.error("Term occurrence is too low - please try another term")
51
+ st.stop()
52
+
53
+ # def findRelationships(query, df):
54
  table = model.wv.most_similar_cosmul(query, topn=10000)
55
  table = (pd.DataFrame(table))
56
  table.index.name = 'Rank'
 
74
  color = [cmap[i] for i in range(len(sizes))]
75
 
76
  short_table.set_index('Word', inplace=True)
77
+ squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, edgecolor="#EBF5FB",
78
+ text_kwargs={'fontsize': 10})
79
  # # plot the treemap using matplotlib
80
  plt.axis('off')
81
  fig = plt.gcf()
 
85
  plt.clf()
86
 
87
  csv = table.head(100).to_csv().encode('utf-8')
88
+ st.download_button(label="download top 100 words (csv)", data=csv, file_name='clotting_words.csv', mime='text/csv')
 
 
 
 
89
 
90
  # st.write(short_table)
91
  #
 
105
  st.subheader(f"Top 10 Genes closely related to {query}")
106
 
107
  df10 = df1.head(10)
108
+ df10.index = 1 / df10.index
109
  sizes = df10.index.tolist()
110
 
111
  cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
112
  color2 = [cmap2[i] for i in range(len(sizes))]
113
 
114
  df10.set_index('Human Gene', inplace=True)
115
+ squarify.plot(sizes=sizes, label=df10.index.tolist(), color=color2, edgecolor="#EBF5FB",
116
+ text_kwargs={'fontsize': 12})
117
  #
118
  # # plot the treemap using matplotlib
119
 
 
126
  st.pyplot(fig2)
127
 
128
  csv = df1.head(100).to_csv().encode('utf-8')
129
+ st.download_button(label="download top 100 genes (csv)", data=csv, file_name='clotting_genes.csv', mime='text/csv')
 
 
 
 
 
 
 
 
130
 
131
  # findRelationships(query, df)
132
 
133
 
 
 
 
 
 
134
  # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
135
  # similar_words = model.most_similar(word)
136
  # output = json.dumps({"word": word, "similar_words": similar_words})