Spaces:

aksj
/

Dreamland-GenAI-Music

Runtime error

App Files Files Community

aksj commited on May 30, 2023

Commit

b487388

1 Parent(s): a613163

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -20

app.py CHANGED Viewed

@@ -1,20 +1,21 @@
 import os
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 import spacy
 import gradio as gr
 import subprocess
-def download_spacy_model(model_name):
-    command = f"python -m spacy download {model_name}"
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = process.communicate()
-    # Check if the command executed successfully
-    if process.returncode != 0:
-        print(f"An error occurred while downloading the model: {stderr.decode('utf-8')}")
-    else:
-        print(f"Successfully downloaded the model: {stdout.decode('utf-8')}")
 # Call the function to download the model
@@ -47,28 +48,55 @@ def download_spacy_model(model_name):
 #     # Return the name of the file with the highest similarity score
 #     return files_names[max_similarity_idx]
 def find_closest(query):
-    try:
-        nlp = spacy.load('en_core_web_md')
-    except:
-        download_spacy_model('en_core_web_md')
-        nlp = spacy.load('en_core_web_md')
     files_names = []
-    files_vectors = []
     for file in os.listdir():
         if file.endswith(".txt"):
             with open(file, 'r') as f:
                 content = f.read()
                 files_names.append(file)
-                # Get the vector representation of the content
-                files_vectors.append(nlp(content).vector)
-    # Get the vector representation of the query
-    query_vector = nlp(query).vector
     # Compute the cosine similarity between the query and all files
-    similarity_scores = cosine_similarity([query_vector], files_vectors)
     # Get the index of the file with the highest similarity score
     max_similarity_idx = similarity_scores.argmax()

 import os
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer
 import spacy
 import gradio as gr
 import subprocess
+# def download_spacy_model(model_name):
+#     command = f"python -m spacy download {model_name}"
+#     process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+#     stdout, stderr = process.communicate()
+#     # Check if the command executed successfully
+#     if process.returncode != 0:
+#         print(f"An error occurred while downloading the model: {stderr.decode('utf-8')}")
+#     else:
+#         print(f"Successfully downloaded the model: {stdout.decode('utf-8')}")
 # Call the function to download the model
 #     # Return the name of the file with the highest similarity score
 #     return files_names[max_similarity_idx]
+# def find_closest(query):
+#     try:
+#         nlp = spacy.load('en_core_web_md')
+#     except:
+#         download_spacy_model('en_core_web_md')
+#         nlp = spacy.load('en_core_web_md')
+#     files_names = []
+#     files_vectors = []
+#     for file in os.listdir():
+#         if file.endswith(".txt"):
+#             with open(file, 'r') as f:
+#                 content = f.read()
+#                 files_names.append(file)
+#                 # Get the vector representation of the content
+#                 files_vectors.append(nlp(content).vector)
+#     # Get the vector representation of the query
+#     query_vector = nlp(query).vector
+#     # Compute the cosine similarity between the query and all files
+#     similarity_scores = cosine_similarity([query_vector], files_vectors)
+#     # Get the index of the file with the highest similarity score
+#     max_similarity_idx = similarity_scores.argmax()
+#     # Return the name of the file with the highest similarity score
+#     return files_names[max_similarity_idx]
 def find_closest(query):
+    model = SentenceTransformer('all-MiniLM-L6-v2')  # You can choose other models
+    files_contents = []
     files_names = []
     for file in os.listdir():
         if file.endswith(".txt"):
             with open(file, 'r') as f:
                 content = f.read()
+                files_contents.append(content)
                 files_names.append(file)
+    # Append query to the end
+    files_contents.append(query)
+    # Create sentence embeddings for each text
+    embeddings = model.encode(files_contents)
     # Compute the cosine similarity between the query and all files
+    similarity_scores = cosine_similarity([embeddings[-1]], embeddings[:-1])
     # Get the index of the file with the highest similarity score
     max_similarity_idx = similarity_scores.argmax()