Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,21 @@
|
|
1 |
import os
|
2 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
4 |
import spacy
|
5 |
import gradio as gr
|
6 |
import subprocess
|
7 |
|
8 |
-
def download_spacy_model(model_name):
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
|
19 |
# Call the function to download the model
|
20 |
|
@@ -47,28 +48,55 @@ def download_spacy_model(model_name):
|
|
47 |
# # Return the name of the file with the highest similarity score
|
48 |
# return files_names[max_similarity_idx]
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
def find_closest(query):
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
download_spacy_model('en_core_web_md')
|
55 |
-
nlp = spacy.load('en_core_web_md')
|
56 |
files_names = []
|
57 |
-
files_vectors = []
|
58 |
|
59 |
for file in os.listdir():
|
60 |
if file.endswith(".txt"):
|
61 |
with open(file, 'r') as f:
|
62 |
content = f.read()
|
|
|
63 |
files_names.append(file)
|
64 |
-
# Get the vector representation of the content
|
65 |
-
files_vectors.append(nlp(content).vector)
|
66 |
|
67 |
-
#
|
68 |
-
|
|
|
|
|
|
|
69 |
|
70 |
# Compute the cosine similarity between the query and all files
|
71 |
-
similarity_scores = cosine_similarity([
|
72 |
|
73 |
# Get the index of the file with the highest similarity score
|
74 |
max_similarity_idx = similarity_scores.argmax()
|
|
|
1 |
import os
|
2 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
import spacy
|
6 |
import gradio as gr
|
7 |
import subprocess
|
8 |
|
9 |
+
# def download_spacy_model(model_name):
|
10 |
+
# command = f"python -m spacy download {model_name}"
|
11 |
+
# process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
12 |
+
# stdout, stderr = process.communicate()
|
13 |
|
14 |
+
# # Check if the command executed successfully
|
15 |
+
# if process.returncode != 0:
|
16 |
+
# print(f"An error occurred while downloading the model: {stderr.decode('utf-8')}")
|
17 |
+
# else:
|
18 |
+
# print(f"Successfully downloaded the model: {stdout.decode('utf-8')}")
|
19 |
|
20 |
# Call the function to download the model
|
21 |
|
|
|
48 |
# # Return the name of the file with the highest similarity score
|
49 |
# return files_names[max_similarity_idx]
|
50 |
|
51 |
+
# def find_closest(query):
|
52 |
+
# try:
|
53 |
+
# nlp = spacy.load('en_core_web_md')
|
54 |
+
# except:
|
55 |
+
# download_spacy_model('en_core_web_md')
|
56 |
+
# nlp = spacy.load('en_core_web_md')
|
57 |
+
# files_names = []
|
58 |
+
# files_vectors = []
|
59 |
+
|
60 |
+
# for file in os.listdir():
|
61 |
+
# if file.endswith(".txt"):
|
62 |
+
# with open(file, 'r') as f:
|
63 |
+
# content = f.read()
|
64 |
+
# files_names.append(file)
|
65 |
+
# # Get the vector representation of the content
|
66 |
+
# files_vectors.append(nlp(content).vector)
|
67 |
+
|
68 |
+
# # Get the vector representation of the query
|
69 |
+
# query_vector = nlp(query).vector
|
70 |
+
|
71 |
+
# # Compute the cosine similarity between the query and all files
|
72 |
+
# similarity_scores = cosine_similarity([query_vector], files_vectors)
|
73 |
+
|
74 |
+
# # Get the index of the file with the highest similarity score
|
75 |
+
# max_similarity_idx = similarity_scores.argmax()
|
76 |
+
|
77 |
+
# # Return the name of the file with the highest similarity score
|
78 |
+
# return files_names[max_similarity_idx]
|
79 |
def find_closest(query):
|
80 |
+
model = SentenceTransformer('all-MiniLM-L6-v2') # You can choose other models
|
81 |
+
|
82 |
+
files_contents = []
|
|
|
|
|
83 |
files_names = []
|
|
|
84 |
|
85 |
for file in os.listdir():
|
86 |
if file.endswith(".txt"):
|
87 |
with open(file, 'r') as f:
|
88 |
content = f.read()
|
89 |
+
files_contents.append(content)
|
90 |
files_names.append(file)
|
|
|
|
|
91 |
|
92 |
+
# Append query to the end
|
93 |
+
files_contents.append(query)
|
94 |
+
|
95 |
+
# Create sentence embeddings for each text
|
96 |
+
embeddings = model.encode(files_contents)
|
97 |
|
98 |
# Compute the cosine similarity between the query and all files
|
99 |
+
similarity_scores = cosine_similarity([embeddings[-1]], embeddings[:-1])
|
100 |
|
101 |
# Get the index of the file with the highest similarity score
|
102 |
max_similarity_idx = similarity_scores.argmax()
|