Spaces:

jfataphd
/

OncoDigger

Runtime error

App Files Files Community

jfataphd commited on Mar 2, 2023

Commit

f67304b

1 Parent(s): a61d268

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -62

app.py CHANGED Viewed

@@ -53,6 +53,14 @@ if opt == "Neuroblastoma corpus":
     model_used = ("pubmed_model_neuroblastoma")
     num_abstracts = 29032
     database_name = "Neuroblastoma"
 st.header(":red[*F*]ast :red[*A*]cting :red[*T*]ext :red[*A*]nalysis (:red[*FATA*]) 4 Science")
@@ -89,6 +97,8 @@ if query:
         st.stop()
     st.markdown("---")
     # def findRelationships(query, df):
     table = model.wv.most_similar_cosmul(query, topn=10000)
     table = (pd.DataFrame(table))
     table.index.name = 'Rank'
@@ -103,58 +113,84 @@ if query:
     # short_table = table.head(50)
     # print(table)
     # calculate the sizes of the squares in the treemap
-    short_table = table2.head(10).round(2)
     short_table.index += 1
     short_table.index = (1 / short_table.index)*10
     sizes = short_table.index.tolist()
-    cmap = plt.cm.Greens(np.linspace(0.05, .5, len(sizes)))
-    color = [cmap[i] for i in range(len(sizes))]
     short_table.set_index('Word', inplace=True)
-    squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, edgecolor="#EBF5FB",
-                  text_kwargs={'fontsize': 10},)
-    # # plot the treemap using matplotlib
-    plt.axis('off')
-    # Add legend to top right, outside plot region
-    # plt.legend("upper right", bbox_to_anchor=(-.2, 0))
-    fig = plt.gcf()
-    fig.patch.set_facecolor('#CCFFFF')
-    # print(table.head(10)["SIMILARITY"])
-    # # display the treemap in Streamlit
     table2["SIMILARITY"] = 'Similarity Score ' + table2.head(10)["SIMILARITY"].round(2).astype(str)
     rank_num = list(short_table.index.tolist())
     # avg_size = sum(sizes) / len(short_table.index)
-    print(rank_num)
     # print(sizes)
     # '{0} in {1}'.format(unicode(self.author, 'utf-8'), unicode(self.publication, 'utf-8'))
-    TEMPLATE = """
-            <br>
-            {0}: <a href='https://pubmed.ncbi.nlm.nih.gov/?term={1}%5Bmh%5D+%20%20%20%20%20NOT
-            +review%5Bpt%5D+AND+english%5Bla%5D+AND+hasabstract+AND+1990%253A2022%252F12%252F31%5Bdp%5D+AND+%22{2}%22'>google</a>
-            """.format(database_name,database_name, database_name)
-    fig = px.treemap(names=rank_num, path=[short_table.index], values=sizes, hover_name=(table2.head(10)['SIMILARITY']))
-    fig.update(layout_coloraxis_showscale=False)
-    fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF")
-    fig.update_annotations(visible=False)
-    fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
-                      hoverlabel_bgcolor="lightgreen", hoverlabel_bordercolor="#000000", texttemplate=TEMPLATE)
-    fig.update_layout(uniformtext=dict(minsize=15, mode='hide'), treemapcolorway=["lightgreen"])
-    # treemap1, treemap2 = st.columns(2)
-    # with treemap1:
-    st.subheader(f"Top 10 Words closely related to {query}")
-    # st.pyplot(fig)
-    # plt.clf()
-    st.plotly_chart(fig, use_container_width=True)
-    csv = table.head(100).to_csv().encode('utf-8')
-    st.download_button(label="download top 100 words (csv)", data=csv, file_name=f'{database_name}_words.csv', mime='text/csv')
     st.markdown("---")
     # st.write(short_table)
     #
@@ -178,7 +214,7 @@ if query:
                 f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
                 f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
                     unsafe_allow_html=True)
-    value = st.slider("", 0, 100, step=5)
     if value > 0:
         # st.subheader(f"Top {value} genes closely related to {query}: "
         #              f"Click on the Pubmed and NCBI links for more gene information")
@@ -192,20 +228,8 @@ if query:
     df10 = df1.head(value)
     df10.index = (1 / df10.index)*10000
     sizes = df10.index.tolist()
-    cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
-    color2 = [cmap2[i] for i in range(len(sizes))]
     df10.set_index('Human Gene', inplace=True)
-    squarify.plot(sizes=sizes, label=df10.index.tolist(), color=color2, edgecolor="#EBF5FB",
-                  text_kwargs={'fontsize': 12})
-    #
-    # # plot the treemap using matplotlib
-    plt.axis('off')
-    fig2 = plt.gcf()
-    fig2.patch.set_facecolor('#CCFFFF')
-    #
     df3 = df1.copy()
     df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value)["SIMILARITY"].round(2).astype(str)
     df3.reset_index(inplace=True)
@@ -216,31 +240,31 @@ if query:
     result = pd.merge(subset, df2, on='symbol2')
     # Show the result
     # print(result)
-    df = df10
     try:
         # Define the `text` column for labels and `href` column for links
-        df['text'] = df10.index
-        df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
                   '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10.index]
-        df['href2'] = [f'https://www.ncbi.nlm.nih.gov/gene/?term=' + c for c in df10.index]
-        df['name'] = [c for c in result['Approved name']]
-        df['database'] = database_name
         # print(df['name'])
         # Create the treemap using `px.treemap`
-        fig = px.treemap(df, path=[df10.index], values=sizes,
-                     custom_data=['href', 'name', 'database', 'href2'], hover_name=(df3.head(value)['SIMILARITY']))
         fig.update(layout_coloraxis_showscale=False)
         fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
         fig.update_annotations(visible=False)
         fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
                       hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
-                      texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{label}</span></b><br><span "
                                    "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
                                    "<a href='%{customdata[0]}'>PubMed"
                                    "</a><br><a href='%{customdata[3]}'>NCBI"
@@ -260,6 +284,8 @@ if query:
         csv = df1.head(value).to_csv().encode('utf-8')
         st.download_button(label=f"download top {value} genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
                        mime='text/csv')
     except:
         st.warning(
             f"This selection exceeds the number of similar genes related to {query} within the {database_name} corpus")

     model_used = ("pubmed_model_neuroblastoma")
     num_abstracts = 29032
     database_name = "Neuroblastoma"
+# if opt == "Breast Cancer corpus":
+#     model_used = ("pubmed_model_breast_cancer")
+#     num_abstracts = 290320
+#     database_name = "Breast_cancer"
+# if opt == "Mammary gland corpus":
+#     model_used = ("pubmed_model_mammary_gland")
+#     num_abstracts = 79032
+#     database_name = "Mammary_gland"
 st.header(":red[*F*]ast :red[*A*]cting :red[*T*]ext :red[*A*]nalysis (:red[*FATA*]) 4 Science")
         st.stop()
     st.markdown("---")
     # def findRelationships(query, df):
     table = model.wv.most_similar_cosmul(query, topn=10000)
     table = (pd.DataFrame(table))
     table.index.name = 'Rank'
     # short_table = table.head(50)
     # print(table)
+    # Create the slider with increments of 5 up to 100
+    st.markdown(
+        f"<b><p style='font-family: Arial; font-size: 20px;'>Populate a treemap with the slider below to visualize "
+        f"<span style='color:red; font-style: italic;'>words</span> contextually "
+        f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
+        f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
+        unsafe_allow_html=True)
+    value_word = st.slider("Words", 0, 100, step=5)
+    if value_word > 0:
+        # st.subheader(f"Top {value} genes closely related to {query}: "
+        #              f"Click on the Pubmed and NCBI links for more gene information")
+        st.markdown(
+            f"<b><p style='font-family: Arial; font-size: 20px; font-style: Bold;'>Top <span style='color:red; font-style: italic;'>{value_word} "
+            f"</span>words similar to "
+            f"<span style='color:red; font-style: italic;'>{query}:</span> Click on the squares to expand and the Wikipaedia links for more word information</span></p></b>",
+            unsafe_allow_html=True)
     # calculate the sizes of the squares in the treemap
+    short_table = table2.head(value_word).round(2)
     short_table.index += 1
     short_table.index = (1 / short_table.index)*10
     sizes = short_table.index.tolist()
     short_table.set_index('Word', inplace=True)
+    # label = short_table.index.tolist()
+    print(short_table.index)
     table2["SIMILARITY"] = 'Similarity Score ' + table2.head(10)["SIMILARITY"].round(2).astype(str)
     rank_num = list(short_table.index.tolist())
     # avg_size = sum(sizes) / len(short_table.index)
+    df = short_table
+    try:
+        # Define the `text` column for labels and `href` column for links
+        df['text'] = short_table.index
+        df['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
+                  '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in short_table.index]
+        df['href2'] = [f'https://en.wikipedia.org/wiki/' + c for c in short_table.index]
+        df['database'] = database_name
     # print(sizes)
     # '{0} in {1}'.format(unicode(self.author, 'utf-8'), unicode(self.publication, 'utf-8'))
+        # Create the treemap using `px.treemap`
+        fig = px.treemap(df, path=[short_table.index], values=sizes, custom_data=['href', 'text', 'database', 'href2'],
+                         hover_name=(table2.head(value_word)['SIMILARITY']))
+        fig.update(layout_coloraxis_showscale=False)
+        fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
+        fig.update_annotations(visible=False)
+        fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
+                          hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
+                          texttemplate="</b><br><span "
+                                       "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
+                                       "<a href='%{customdata[0]}'>PubMed"
+                                       "</a><br><a href='%{customdata[3]}'>Wikipedia"
+                                       "</span></a>")
+        fig.update_layout(uniformtext=dict(minsize=15), treemapcolorway=["lightgreen"])
+        # st.pyplot(fig2)
+        st.plotly_chart(fig, use_container_width=True)
+        # st.caption(
+        #     "Gene designation and database provided by HUGO Gene Nomenclature Committee (HGNC): https://www.genenames.org/")
+        # st.caption("Gene designation add in exceptions [p21, p53, her2, her3]")
+        csv = table2.head(value_word).to_csv().encode('utf-8')
+        st.download_button(label=f"download top {value_word} words (csv)", data=csv, file_name=f'{database_name}_words.csv',
+                           mime='text/csv')
+    except:
+        st.warning(
+            f"This selection exceeds the number of similar words related to {query} within the {database_name} corpus")
     st.markdown("---")
     # st.write(short_table)
     #
                 f"and semantically similar to <span style='color:red; font-style: italic;'>{query}</span> "
                 f"within the <span style='color:red; font-style: italic;'>{database_name}</span> corpus.</p></b>",
                     unsafe_allow_html=True)
+    value = st.slider("Gene", 0, 100, step=5)
     if value > 0:
         # st.subheader(f"Top {value} genes closely related to {query}: "
         #              f"Click on the Pubmed and NCBI links for more gene information")
     df10 = df1.head(value)
     df10.index = (1 / df10.index)*10000
     sizes = df10.index.tolist()
     df10.set_index('Human Gene', inplace=True)
     df3 = df1.copy()
     df3["SIMILARITY"] = 'Similarity Score ' + df3.head(value)["SIMILARITY"].round(2).astype(str)
     df3.reset_index(inplace=True)
     result = pd.merge(subset, df2, on='symbol2')
     # Show the result
     # print(result)
+    # label = df10.index.tolist()
+    df2 = df10
     try:
         # Define the `text` column for labels and `href` column for links
+        df2['text'] = df10.index
+        df2['href'] = [f'https://pubmed.ncbi.nlm.nih.gov/?term={database_name}%5Bmh%5D+NOT+review%5Bpt%5D' \
                   '+AND+english%5Bla%5D+AND+hasabstract+AND+1990:2022%5Bdp%5D+AND+' + c for c in df10.index]
+        df2['href2'] = [f'https://www.ncbi.nlm.nih.gov/gene/?term=' + c for c in df10.index]
+        df2['name'] = [c for c in result['Approved name']]
+        df2['database'] = database_name
         # print(df['name'])
         # Create the treemap using `px.treemap`
+        fig = px.treemap(df2, path=[df10.index], values=sizes,
+                     custom_data=['href', 'name', 'database', 'href2', 'text'], hover_name=(df3.head(value)['SIMILARITY']))
         fig.update(layout_coloraxis_showscale=False)
         fig.update_layout(autosize=True, paper_bgcolor="#CCFFFF", margin=dict(t=0, b=0, l=0, r=0))
         fig.update_annotations(visible=False)
         fig.update_traces(marker=dict(cornerradius=5), root_color="#CCFFFF", hovertemplate=None,
                       hoverlabel_bgcolor="lightblue", hoverlabel_bordercolor="#000000",
+                      texttemplate="<b><span style='font-family: Arial; font-size: 20px;'>%{customdata[4]}</span></b><br><span "
                                    "style='font-family: Arial; font-size: 15px;'>%{customdata[1]}<br>"
                                    "<a href='%{customdata[0]}'>PubMed"
                                    "</a><br><a href='%{customdata[3]}'>NCBI"
         csv = df1.head(value).to_csv().encode('utf-8')
         st.download_button(label=f"download top {value} genes (csv)", data=csv, file_name=f'{database_name}_genes.csv',
                        mime='text/csv')
     except:
         st.warning(
             f"This selection exceeds the number of similar genes related to {query} within the {database_name} corpus")