Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,8 @@ all_questions = questions_english + questions_arabic
|
|
17 |
vectorizer = TfidfVectorizer().fit(all_questions)
|
18 |
questions_tfidf = vectorizer.transform(all_questions)
|
19 |
|
20 |
-
# Initialize Hugging Face Inference Client
|
21 |
-
client = InferenceClient("
|
22 |
|
23 |
def find_similar_question(user_message: str, threshold: float = 0.7) -> str:
|
24 |
"""
|
@@ -27,21 +27,21 @@ def find_similar_question(user_message: str, threshold: float = 0.7) -> str:
|
|
27 |
"""
|
28 |
# Transform the user's message using the same vectorizer
|
29 |
user_tfidf = vectorizer.transform([user_message])
|
30 |
-
|
31 |
# Compute cosine similarity with all dataset questions
|
32 |
similarities = cosine_similarity(user_tfidf, questions_tfidf).flatten()
|
33 |
-
|
34 |
# Find the index of the most similar question
|
35 |
max_similarity_index = similarities.argmax()
|
36 |
max_similarity_score = similarities[max_similarity_index]
|
37 |
-
|
38 |
# If the similarity score exceeds the threshold, return the corresponding answer
|
39 |
if max_similarity_score >= threshold:
|
40 |
if max_similarity_index < len(questions_english):
|
41 |
return df['Answer'].iloc[max_similarity_index]
|
42 |
else:
|
43 |
return df['إجابة'].iloc[max_similarity_index - len(questions_english)]
|
44 |
-
|
45 |
# Return None if no similar question is found
|
46 |
return None
|
47 |
|
@@ -62,17 +62,25 @@ def respond(
|
|
62 |
return
|
63 |
|
64 |
# If no similar question is found, use the Hugging Face model
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
response = ""
|
68 |
-
for msg in client.
|
69 |
-
|
70 |
-
|
71 |
stream=True,
|
72 |
temperature=temperature,
|
73 |
top_p=top_p,
|
74 |
):
|
75 |
-
token = msg.
|
76 |
response += token
|
77 |
yield response
|
78 |
|
|
|
17 |
vectorizer = TfidfVectorizer().fit(all_questions)
|
18 |
questions_tfidf = vectorizer.transform(all_questions)
|
19 |
|
20 |
+
# Initialize Hugging Face Inference Client
|
21 |
+
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
22 |
|
23 |
def find_similar_question(user_message: str, threshold: float = 0.7) -> str:
|
24 |
"""
|
|
|
27 |
"""
|
28 |
# Transform the user's message using the same vectorizer
|
29 |
user_tfidf = vectorizer.transform([user_message])
|
30 |
+
|
31 |
# Compute cosine similarity with all dataset questions
|
32 |
similarities = cosine_similarity(user_tfidf, questions_tfidf).flatten()
|
33 |
+
|
34 |
# Find the index of the most similar question
|
35 |
max_similarity_index = similarities.argmax()
|
36 |
max_similarity_score = similarities[max_similarity_index]
|
37 |
+
|
38 |
# If the similarity score exceeds the threshold, return the corresponding answer
|
39 |
if max_similarity_score >= threshold:
|
40 |
if max_similarity_index < len(questions_english):
|
41 |
return df['Answer'].iloc[max_similarity_index]
|
42 |
else:
|
43 |
return df['إجابة'].iloc[max_similarity_index - len(questions_english)]
|
44 |
+
|
45 |
# Return None if no similar question is found
|
46 |
return None
|
47 |
|
|
|
62 |
return
|
63 |
|
64 |
# If no similar question is found, use the Hugging Face model
|
65 |
+
messages = [{"role": "system", "content": system_message}]
|
66 |
+
|
67 |
+
for val in history:
|
68 |
+
if val[0]:
|
69 |
+
messages.append({"role": "user", "content": val[0]})
|
70 |
+
if val[1]:
|
71 |
+
messages.append({"role": "assistant", "content": val[1]})
|
72 |
+
|
73 |
+
messages.append({"role": "user", "content": message})
|
74 |
|
75 |
response = ""
|
76 |
+
for msg in client.chat_completion(
|
77 |
+
messages,
|
78 |
+
max_tokens=max_tokens,
|
79 |
stream=True,
|
80 |
temperature=temperature,
|
81 |
top_p=top_p,
|
82 |
):
|
83 |
+
token = msg.choices[0].delta.content
|
84 |
response += token
|
85 |
yield response
|
86 |
|