Spaces:

at2507
/

SM_NLP_RecoSys

Sleeping

App Files Files Community

at2507 commited on May 11, 2023

Commit

76dd2ad

1 Parent(s): c070508

Upload app.py

Browse files

Files changed (1) hide show

app.py +44 -114

app.py CHANGED Viewed

@@ -17,12 +17,16 @@ from numpy.linalg import norm
 import json
 import ast
 import gradio as gr
 from datetime import datetime
 import time
 import dataframe_image as dfi
 print("Packages loaded!")
 # write out functions
@@ -55,129 +59,55 @@ def cosine_similarity_generator(master_exploded, embeddings, query, filename = t
   master_exploded_top = master_exploded[master_exploded['cos_sim']> 0.6]
   print("The number of results with cosine similarity > 0.6 are: ", len(master_exploded[master_exploded['cos_sim']> 0.6]))
-  master_exploded_top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
   print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
   # print(master_exploded_top_k)
-  cosine_sum_by_name = master_exploded_top.groupby(["name", "query", "combined", "tokenized_sentences",]).agg({"cos_sim": ["sum"]}).reset_index()
   print("Taking sum of cosine similarities above 0.6 threshold...")
   cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
   ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
-  path = "./Ranked_Results_Gradio/"
-  ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
-  cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
-  top_10_filename = path+'top_10_'+str(filename)+'.csv'
-  above_threshold_filename = path+"above_0.6_threshold_"+str(filename)+".csv"
   # save 3 files: Ranked mentors, top 10 matches baed on cosine similarity sum, and then all of the results per run.
   # ranked_mentors.head(10).to_csv(ranked_mentors_filename)
   # cosine_sum_by_name.sort_values(by ="cos_sim_sum", ascending =False).head(10).to_csv(top_10_filename)
   # cosine_sum_by_name.to_csv(cos_sum_filename)
   # master_exploded_top.sort_values(by ="cos_sim", ascending =False).to_csv(above_threshold_filename)
-  return master_exploded_top, master_exploded_top_k, cosine_sum_by_name
-# def recosys(query):
-#     # main function
-#     master_exploded = load_pickle()
-#     print("Pickle loaded!")
-#     embeddings, query = sentence_embedding_generator(query)
-#     print("Query embeddings generated...")
-#     # change query to date time
-#     ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,  embeddings, query, time.strftime("%Y%m%d-%H%M%S"))
-#     print("Ranked mentors printed! Check your directory.")
-#     # to_return_df = gr.Dataframe(
-#     #    headers = ["name_", "combined_", "cos_sim_sum"],
-#     #     datatype=["str", "str", "number"],
-#     #     row_count=10,
-#     #     col_count=(3, "fixed"),
-#     #     )
-#     return top_10
-# def recosys_embedding(query):
-#    master_exploded = load_pickle()
-#    print("Pickle loading....")
-#    embeddings, query = sentence_embedding_generator(query)
-#    return embeddings, master_exploded
-# def embeddings_to_ranking(embeddings, master_exploded, query):
-#    ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,  embeddings, query, time.strftime("%Y%m%d-%H%M%S"))
-#    return top_10
-# def flip_text(x):
-#     return x[::-1]
-master_exploded = load_pickle()
-demo = gr.Blocks()
-with demo:
-   gr.Markdown(
-      """
-      # Mentor Semantic Search Recommender System
-      What kind of mentor are you looking for?
-      """
-      )
-   input = gr.Textbox(placeholder="someone who has experience working in big tech")
-   def generate_results(input):
-     embeddings, query = sentence_embedding_generator(str(input))
-     ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
-                                                                              embeddings,
-                                                                              query,
-                                                                              time.strftime("%Y%m%d-%H%M%S"))
-     print(top_10.columns)
-     return(top_10['name'].to_list())
-demo = gr.Interface(fn=generate_results, inputs="text", outputs="text")
-demo.launch()
-# demo.launch(share=True)
-#    embeddings_btn = gr.Button("Generate query embeddings")
-#    def embedding_gen(input):
-#      embeddings, query = sentence_embedding_generator(str(input))
-#      return embeddings
-#    embeddings_btn.click(fn=embedding_gen, inputs=input, outputs="number")
-#    ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
-#                                                                               embeddings,
-#                                                                               query,
-#                                                                               time.strftime("%Y%m%d-%H%M%S"))
-#    def top_5_mentors(top_10):
-#      return top_10.head(5)
-# #    def scorer(embeddings, query):
-# #      ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
-# #                                                                               embeddings,
-# #                                                                               query,
-# #                                                                               time.strftime("%Y%m%d-%H%M%S"))
-# #      return top_10
-#    submit_btn = gr.Button("Search for mentors")
-#    submit_btn.click(fn=top_5_mentors, inputs=top_10, outputs=gr.Dataframe())
-# #    dfi.export(df_styled,"mytable.png")
-# #    submit_btn.click(fn=recosys, inputs=input, outputs=gr.Dataframe(), api_name="RecoSys")
-#    input.change(fn=recosys, inputs = input, outputs = output)
-# demo = gr.Interface(
-#     recosys,
-#     inputs="text",
-#     "dataframe")
-#     outputs=gr.Dataframe(
-#        headers = ["name_", "combined_", "cos_sim_sum"],
-#         datatype=["str", "str", "number"],
-#         row_count=10,
-#         col_count=(3, "fixed"),
-#         )
-#     # "dataframe",
-#     # description="What kind fo mentoir are you looking for?",
-# )
-# demo = gr.Interface(fn=recosys, inputs="text", outputs="dataframe")

 import json
 import ast
+import requests
 import gradio as gr
 from datetime import datetime
 import time
 import dataframe_image as dfi
 print("Packages loaded!")
 # write out functions
   master_exploded_top = master_exploded[master_exploded['cos_sim']> 0.6]
   print("The number of results with cosine similarity > 0.6 are: ", len(master_exploded[master_exploded['cos_sim']> 0.6]))
+  top_k = master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)
   print(" The top k=10 results have a min cosine similarity of: ", master_exploded.sort_values(by=['cos_sim'], ascending=False).head(10)['cos_sim'].min())
   # print(master_exploded_top_k)
+  cosine_sum_by_name = master_exploded_top.groupby(["id", "name", "query", "gitHubUrl", "combined" ]).agg({"cos_sim": ["sum"]}).reset_index()
   print("Taking sum of cosine similarities above 0.6 threshold...")
   cosine_sum_by_name.columns = cosine_sum_by_name.columns.map('_'.join)
   ranked_mentors = cosine_sum_by_name.reset_index().sort_values(by ="cos_sim_sum", ascending =False)
+  # path = "./Ranked_Results_Gradio/"
+  # ranked_mentors_filename = path+'ranked_mentors_'+str(filename)+'.csv'
+  # cos_sum_filename = path+'cos_sum_'+str(filename)+'.csv'
+  # top_10_filename = path+'top_10_'+str(filename)+'.csv'
+  # above_threshold_filename = path+"above_0.6_threshold_"+str(filename)+".csv"
   # save 3 files: Ranked mentors, top 10 matches baed on cosine similarity sum, and then all of the results per run.
   # ranked_mentors.head(10).to_csv(ranked_mentors_filename)
   # cosine_sum_by_name.sort_values(by ="cos_sim_sum", ascending =False).head(10).to_csv(top_10_filename)
   # cosine_sum_by_name.to_csv(cos_sum_filename)
   # master_exploded_top.sort_values(by ="cos_sim", ascending =False).to_csv(above_threshold_filename)
+  return master_exploded_top, top_k, cosine_sum_by_name
+def dataframe_output(cosine_sum_by_name):
+  # return master_exploded_top_k
+  json_df = cosine_sum_by_name.to_json(orient="columns")
+  return json_df
+def generate_results(input):
+  master_exploded = load_pickle()
+  embeddings, query = sentence_embedding_generator(str(input))
+  ranked_mentors, top_10, cosine_sum_by_name = cosine_similarity_generator(master_exploded,
+                                                                            embeddings,
+                                                                            query,
+                                                                            time.strftime("%Y%m%d-%H%M%S"))
+  print(cosine_sum_by_name.columns)
+  df_output = pd.read_json(dataframe_output(cosine_sum_by_name))
+  print(df_output)
+  # df_output = dataframe_output(cosine_sum_by_name)
+  print("JSON created...")
+  subset = df_output.head(10)  # Select the first 10 rows
+  return subset
+iface = gr.Interface(
+    fn=generate_results,
+    inputs=gr.inputs.Textbox(label="What kind of mentor are you looking for?"),
+    outputs=gr.outputs.Dataframe(type="pandas"),
+    title="SharpestMinds Mentor Recommender Semantic Search App",
+    description="Converts a string query into an embedding, and then compares the aggregate cosine similarity by mentor.",
+)
+iface.launch()