File size: 3,070 Bytes
1699569
 
 
 
 
65ce061
bfdebb5
1699569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2912c4
1699569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b2cc15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1699569
 
 
4b2cc15
1699569
 
 
4b2cc15
1699569
 
 
4b2cc15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1699569
4b2cc15
 
1699569
 
4b2cc15
1699569
65ce061
b2912c4
 
2f21339
 
 
 
5ba2c0e
 
1699569
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
import time
import json
from gensim.models import Word2Vec
import pandas as pd
from datasets import load_dataset
from datasets import Dataset

# Define the HTML and CSS styles
html_temp = """
<div style="background-color:black;padding:10px">
<h1 style="color:white;text-align:center;">My Streamlit App with HTML and CSS</h1>
</div>
"""

# Display the HTML and CSS styles
st.markdown(html_temp, unsafe_allow_html=True)

# Add some text to the app
st.write("This is my Streamlit app with HTML and CSS formatting.")

query = st.text_input("Enter a word")
# query = input ("Enter your keyword(s):")
query = query.lower()

if query:
    model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
    words = list(model.wv.key_to_index)
    X = model.wv[model.wv.key_to_index]
    model2 = model.wv[query]
    df = pd.DataFrame(X)


# def findRelationships(query, df):
    table = model.wv.most_similar_cosmul(query, topn=10000)
    table = (pd.DataFrame(table))
    table.index.name = 'Rank'
    table.columns = ['Word', 'SIMILARITY']
    print()
    print("Similarity to " + str(query))
    pd.set_option('display.max_rows', None)

    csv = table.head(50).to_csv(index=False).encode('utf-8')
    st.download_button(
        label=f"Download words similar to {query} in .csv format",
        data=csv,
        file_name='clotting_sim1.csv',
        mime='text/csv'
    )

    json = table.head(50).to_json(index=True).encode('utf-8')
    st.download_button(
        label=f"Download words similar to {query} in .js format",
        data=json,
        file_name='clotting_sim1.js',
        mime='json'
    )

    print(table.head(10))
    table.head(50).to_csv("clotting_sim1.csv", index=True)
    table.head(50).to_json("clotting_sim1.js", index=True)
    st.header(f"Similar Words to {query}")
    st.write(table.head(50))
    #

    print()
    print("Human genes similar to " + str(query))
    df1 = table
    df2 = pd.read_csv('Human_Genes.csv')
    m = df1.Word.isin(df2.symbol)
    df1 = df1[m]
    df1.rename(columns={'Word': 'Human Gene'}, inplace=True)

    csv2 = df1.head(50).to_csv(index=False).encode('utf-8')
    st.download_button(
        label=f"Download genes similar to {query} in .csv format",
        data=csv2,
        file_name='clotting_sim2.csv',
        mime='text/csv'
    )

    json2 = df1.head(50).to_json(index=True).encode('utf-8')
    st.download_button(
        label=f"Download words similar to {query} in .js format",
        data=json2,
        file_name='clotting_sim1.js',
        mime='json'
    )
    print(df1.head(10))
    df1.head(50).to_csv("clotting_sim2.csv", index=True)
    df1.head(50).to_json("clotting_sim2.js", index=True)
    print()
    st.header(f"Similar Genes to {query}")
    st.write(df1.head(50))


    # arrow_dataset = Dataset.from_pandas(df1.head(50))
    # arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2")

    # arrow_dataset_reloaded = load_from_disk('sim2.js')
    # arrow_dataset_reloaded