at2507 commited on
Commit
76dd2ad
·
1 Parent(s): c070508

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -114
app.py CHANGED
@@ -17,12 +17,16 @@ from numpy.linalg import norm
17
 
18
  import json
19
  import ast
 
 
 
20
 
21
  import gradio as gr
22
  from datetime import datetime
23
  import time
24
  import dataframe_image as dfi
25
 
 
26
  print("Packages loaded!")
27
 
28
  # write out functions
@@ -55,129 +59,55 @@ def cosine_similarity_generator(master_exploded, embeddings, query, filename = t
55
  master_exploded_top = master_exploded[master_exploded['cos_sim']> 0.6]
56
  print("The number of results with cosine similarity > 0.6 are: ", len(master_exploded[master_exploded['cos_sim']> 0.6]))
57
 
58
- master_exploded_top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
59
  print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
60
  # print(master_exploded_top_k)
61
- cosine_sum_by_name = master_exploded_top.groupby(["name", "query", "combined", "tokenized_sentences",]).agg({"cos_sim": ["sum"]}).reset_index()
62
  print("Taking sum of cosine similarities above 0.6 threshold...")
63
  cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
64
 
65
  ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
66
- path = "./Ranked_Results_Gradio/"
67
- ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
68
- cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
69
- top_10_filename = path+'top_10_'+str(filename)+'.csv'
70
- above_threshold_filename = path+"above_0.6_threshold_"+str(filename)+".csv"
71
 
72
  # save 3 files: Ranked mentors, top 10 matches baed on cosine similarity sum, and then all of the results per run.
73
  # ranked_mentors.head(10).to_csv(ranked_mentors_filename)
74
  # cosine_sum_by_name.sort_values(by ="cos_sim_sum", ascending =False).head(10).to_csv(top_10_filename)
75
  # cosine_sum_by_name.to_csv(cos_sum_filename)
76
  # master_exploded_top.sort_values(by ="cos_sim", ascending =False).to_csv(above_threshold_filename)
77
- return master_exploded_top, master_exploded_top_k, cosine_sum_by_name
78
-
79
-
80
-
81
- # def recosys(query):
82
- # # main function
83
- # master_exploded = load_pickle()
84
- # print("Pickle loaded!")
85
- # embeddings, query = sentence_embedding_generator(query)
86
- # print("Query embeddings generated...")
87
- # # change query to date time
88
- # ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded, embeddings, query, time.strftime("%Y%m%d-%H%M%S"))
89
- # print("Ranked mentors printed! Check your directory.")
90
- # # to_return_df = gr.Dataframe(
91
- # # headers = ["name_", "combined_", "cos_sim_sum"],
92
- # # datatype=["str", "str", "number"],
93
- # # row_count=10,
94
- # # col_count=(3, "fixed"),
95
- # # )
96
- # return top_10
97
-
98
- # def recosys_embedding(query):
99
- # master_exploded = load_pickle()
100
- # print("Pickle loading....")
101
- # embeddings, query = sentence_embedding_generator(query)
102
- # return embeddings, master_exploded
103
-
104
- # def embeddings_to_ranking(embeddings, master_exploded, query):
105
- # ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded, embeddings, query, time.strftime("%Y%m%d-%H%M%S"))
106
- # return top_10
107
-
108
- # def flip_text(x):
109
- # return x[::-1]
110
-
111
- master_exploded = load_pickle()
112
-
113
- demo = gr.Blocks()
114
-
115
- with demo:
116
- gr.Markdown(
117
- """
118
- # Mentor Semantic Search Recommender System
119
- What kind of mentor are you looking for?
120
- """
121
- )
122
- input = gr.Textbox(placeholder="someone who has experience working in big tech")
123
-
124
- def generate_results(input):
125
- embeddings, query = sentence_embedding_generator(str(input))
126
- ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
127
- embeddings,
128
- query,
129
- time.strftime("%Y%m%d-%H%M%S"))
130
- print(top_10.columns)
131
- return(top_10['name'].to_list())
132
-
133
- demo = gr.Interface(fn=generate_results, inputs="text", outputs="text")
134
- demo.launch()
135
- # demo.launch(share=True)
136
- # embeddings_btn = gr.Button("Generate query embeddings")
137
-
138
- # def embedding_gen(input):
139
- # embeddings, query = sentence_embedding_generator(str(input))
140
- # return embeddings
141
-
142
-
143
- # embeddings_btn.click(fn=embedding_gen, inputs=input, outputs="number")
144
- # ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
145
- # embeddings,
146
- # query,
147
- # time.strftime("%Y%m%d-%H%M%S"))
148
- # def top_5_mentors(top_10):
149
- # return top_10.head(5)
150
-
151
- # # def scorer(embeddings, query):
152
- # # ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
153
- # # embeddings,
154
- # # query,
155
- # # time.strftime("%Y%m%d-%H%M%S"))
156
- # # return top_10
157
-
158
- # submit_btn = gr.Button("Search for mentors")
159
- # submit_btn.click(fn=top_5_mentors, inputs=top_10, outputs=gr.Dataframe())
160
- # # dfi.export(df_styled,"mytable.png")
161
- # # submit_btn.click(fn=recosys, inputs=input, outputs=gr.Dataframe(), api_name="RecoSys")
162
-
163
- # input.change(fn=recosys, inputs = input, outputs = output)
164
-
165
-
166
- # demo = gr.Interface(
167
- # recosys,
168
- # inputs="text",
169
- # "dataframe")
170
- # outputs=gr.Dataframe(
171
- # headers = ["name_", "combined_", "cos_sim_sum"],
172
- # datatype=["str", "str", "number"],
173
- # row_count=10,
174
- # col_count=(3, "fixed"),
175
- # )
176
-
177
- # # "dataframe",
178
- # # description="What kind fo mentoir are you looking for?",
179
- # )
180
- # demo = gr.Interface(fn=recosys, inputs="text", outputs="dataframe")
181
-
182
-
183
-
 
17
 
18
  import json
19
  import ast
20
+ import requests
21
+
22
+
23
 
24
  import gradio as gr
25
  from datetime import datetime
26
  import time
27
  import dataframe_image as dfi
28
 
29
+
30
  print("Packages loaded!")
31
 
32
  # write out functions
 
59
  master_exploded_top = master_exploded[master_exploded['cos_sim']> 0.6]
60
  print("The number of results with cosine similarity > 0.6 are: ", len(master_exploded[master_exploded['cos_sim']> 0.6]))
61
 
62
+ top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
63
  print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
64
  # print(master_exploded_top_k)
65
+ cosine_sum_by_name = master_exploded_top.groupby(["id", "name", "query", "gitHubUrl", "combined" ]).agg({"cos_sim": ["sum"]}).reset_index()
66
  print("Taking sum of cosine similarities above 0.6 threshold...")
67
  cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
68
 
69
  ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
70
+ # path = "./Ranked_Results_Gradio/"
71
+ # ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
72
+ # cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
73
+ # top_10_filename = path+'top_10_'+str(filename)+'.csv'
74
+ # above_threshold_filename = path+"above_0.6_threshold_"+str(filename)+".csv"
75
 
76
  # save 3 files: Ranked mentors, top 10 matches baed on cosine similarity sum, and then all of the results per run.
77
  # ranked_mentors.head(10).to_csv(ranked_mentors_filename)
78
  # cosine_sum_by_name.sort_values(by ="cos_sim_sum", ascending =False).head(10).to_csv(top_10_filename)
79
  # cosine_sum_by_name.to_csv(cos_sum_filename)
80
  # master_exploded_top.sort_values(by ="cos_sim", ascending =False).to_csv(above_threshold_filename)
81
+ return master_exploded_top, top_k, cosine_sum_by_name
82
+
83
+ def dataframe_output(cosine_sum_by_name):
84
+ # return master_exploded_top_k
85
+ json_df = cosine_sum_by_name.to_json(orient="columns")
86
+ return json_df
87
+
88
+
89
+ def generate_results(input):
90
+ master_exploded = load_pickle()
91
+ embeddings, query = sentence_embedding_generator(str(input))
92
+ ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
93
+ embeddings,
94
+ query,
95
+ time.strftime("%Y%m%d-%H%M%S"))
96
+ print(cosine_sum_by_name.columns)
97
+ df_output = pd.read_json(dataframe_output(cosine_sum_by_name))
98
+ print(df_output)
99
+ # df_output = dataframe_output(cosine_sum_by_name)
100
+ print("JSON created...")
101
+ subset = df_output.head(10) # Select the first 10 rows
102
+
103
+ return subset
104
+
105
+ iface = gr.Interface(
106
+ fn=generate_results,
107
+ inputs=gr.inputs.Textbox(label="What kind of mentor are you looking for?"),
108
+ outputs=gr.outputs.Dataframe(type="pandas"),
109
+ title="SharpestMinds Mentor Recommender Semantic Search App",
110
+ description="Converts a string query into an embedding, and then compares the aggregate cosine similarity by mentor.",
111
+ )
112
+
113
+ iface.launch()