rasmus1610 commited on
Commit
c6c79f7
·
1 Parent(s): 43e19e9

Create app.py

Browse files

initial commit

Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sentence_transformers import SentenceTransformer
5
+ import re
6
+ import pdftotext
7
+ import gradio as gr
8
+ import json
9
+
10
+ # Calculate the cosine similarity
11
+ def cos_sim(vector1, vector2):
12
+ cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
13
+ return cosine_similarity
14
+
15
+ def sim_search(df, query, n=3, dot=False):
16
+ embedding = model.encode(query)
17
+ if dot:
18
+ df['similarities'] = df.embeddings.apply(lambda x: x@embedding)
19
+ print("using dot product")
20
+ else:
21
+ df['similarities'] = df.embeddings.apply(lambda x: cos_sim(x, embedding))
22
+ print("using cosine similarity")
23
+ res = df.sort_values('similarities', ascending=False).head(n)
24
+ return res
25
+
26
+ def create_prompt(context, question):
27
+ return f"""
28
+ Context information is below.
29
+ ---------------------
30
+ {context}
31
+ ---------------------
32
+ Given the context information and not prior knowledge, answer the query.
33
+ Query: {question}
34
+ Answer: \
35
+ """
36
+
37
+
38
+ def answer_question(question, model="gpt-3.5-turbo",n=3):
39
+ r = sim_search(df, question,n=n)
40
+ context = "\n\n".join(r.chunks)
41
+ prompt = create_prompt(context, question)
42
+ response = openai.ChatCompletion.create(
43
+ model="gpt-3.5-turbo",
44
+ messages=[
45
+ {"role": "system", "content": "You are a helpful assistant answering questions in german. You answer only in german. If you do not know an answer you say it. You do not fabricate answers."},
46
+ {"role": "user", "content": prompt},
47
+ ]
48
+ )
49
+ return response.choices[0].message.content
50
+
51
+ df = pd.read_csv("stroke_embeddings_sentence_transformers.csv")
52
+ df["embeddings"] = df.embeddings.apply(json.loads)
53
+ model = SentenceTransformer('thenlper/gte-base')
54
+
55
+ def gradio_answer(input):
56
+ return answer_question(input)
57
+
58
+ demo = gr.Interface(fn=gradio_answer, inputs=gr.Textbox(lines=1, placeholder="Frage hier...", label="Frage"), outputs=gr.Textbox(lines=4, placeholder="Antwort hier...", label="Antwort"), title="S3 Leitlinie Carotis Stenose", examples=["In welchen Intervallen ist eine Nachuntersuchung nach CAS angezeigt?", "Ist eine ambulante Therapie der Carotisstenose mittels CEA oder CAS möglich und sinnvoll?", "Was sollte man als Bradykardie-Therapie bei Nachdilatation eines Stents einsetzen?"])
59
+
60
+ demo.launch()