Spaces:
Runtime error
Runtime error
Commit
·
c6c79f7
1
Parent(s):
43e19e9
Create app.py
Browse filesinitial commit
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
import re
|
6 |
+
import pdftotext
|
7 |
+
import gradio as gr
|
8 |
+
import json
|
9 |
+
|
10 |
+
# Calculate the cosine similarity
|
11 |
+
def cos_sim(vector1, vector2):
|
12 |
+
cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
|
13 |
+
return cosine_similarity
|
14 |
+
|
15 |
+
def sim_search(df, query, n=3, dot=False):
|
16 |
+
embedding = model.encode(query)
|
17 |
+
if dot:
|
18 |
+
df['similarities'] = df.embeddings.apply(lambda x: x@embedding)
|
19 |
+
print("using dot product")
|
20 |
+
else:
|
21 |
+
df['similarities'] = df.embeddings.apply(lambda x: cos_sim(x, embedding))
|
22 |
+
print("using cosine similarity")
|
23 |
+
res = df.sort_values('similarities', ascending=False).head(n)
|
24 |
+
return res
|
25 |
+
|
26 |
+
def create_prompt(context, question):
|
27 |
+
return f"""
|
28 |
+
Context information is below.
|
29 |
+
---------------------
|
30 |
+
{context}
|
31 |
+
---------------------
|
32 |
+
Given the context information and not prior knowledge, answer the query.
|
33 |
+
Query: {question}
|
34 |
+
Answer: \
|
35 |
+
"""
|
36 |
+
|
37 |
+
|
38 |
+
def answer_question(question, model="gpt-3.5-turbo",n=3):
|
39 |
+
r = sim_search(df, question,n=n)
|
40 |
+
context = "\n\n".join(r.chunks)
|
41 |
+
prompt = create_prompt(context, question)
|
42 |
+
response = openai.ChatCompletion.create(
|
43 |
+
model="gpt-3.5-turbo",
|
44 |
+
messages=[
|
45 |
+
{"role": "system", "content": "You are a helpful assistant answering questions in german. You answer only in german. If you do not know an answer you say it. You do not fabricate answers."},
|
46 |
+
{"role": "user", "content": prompt},
|
47 |
+
]
|
48 |
+
)
|
49 |
+
return response.choices[0].message.content
|
50 |
+
|
51 |
+
df = pd.read_csv("stroke_embeddings_sentence_transformers.csv")
|
52 |
+
df["embeddings"] = df.embeddings.apply(json.loads)
|
53 |
+
model = SentenceTransformer('thenlper/gte-base')
|
54 |
+
|
55 |
+
def gradio_answer(input):
|
56 |
+
return answer_question(input)
|
57 |
+
|
58 |
+
demo = gr.Interface(fn=gradio_answer, inputs=gr.Textbox(lines=1, placeholder="Frage hier...", label="Frage"), outputs=gr.Textbox(lines=4, placeholder="Antwort hier...", label="Antwort"), title="S3 Leitlinie Carotis Stenose", examples=["In welchen Intervallen ist eine Nachuntersuchung nach CAS angezeigt?", "Ist eine ambulante Therapie der Carotisstenose mittels CEA oder CAS möglich und sinnvoll?", "Was sollte man als Bradykardie-Therapie bei Nachdilatation eines Stents einsetzen?"])
|
59 |
+
|
60 |
+
demo.launch()
|