Update backend.py
Browse files- backend.py +27 -22
backend.py
CHANGED
@@ -1,42 +1,47 @@
|
|
1 |
from transformers import pipeline
|
2 |
from retrieval import get_relevant_pubmed_docs
|
3 |
-
from mini_ladder import generate_sub_questions, self_critique_and_refine
|
4 |
|
5 |
-
# Use Microsoft BioGPT-Large-PubMedQA for generation
|
6 |
MODEL_NAME = "microsoft/BioGPT-Large-PubMedQA"
|
7 |
qa_pipeline = pipeline("text-generation", model=MODEL_NAME)
|
8 |
|
9 |
-
# In-memory cache for documents (used
|
10 |
docs_cache = {}
|
11 |
|
12 |
def process_medical_query(query: str):
|
13 |
"""
|
14 |
-
Processes
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
-
#
|
21 |
-
sub_questions = generate_sub_questions(query)
|
22 |
-
|
23 |
-
# Step 2: Retrieve relevant documents via PubMed and Chroma
|
24 |
relevant_docs = get_relevant_pubmed_docs(query)
|
25 |
docs_cache[query] = relevant_docs
|
26 |
|
27 |
if not relevant_docs:
|
28 |
-
return ("No documents found for this query.",
|
29 |
|
30 |
-
#
|
31 |
context_text = "\n\n".join(relevant_docs)
|
32 |
prompt = f"Question: {query}\nContext: {context_text}\nAnswer:"
|
33 |
-
initial_gen = qa_pipeline(prompt, max_new_tokens=100, truncation=True)
|
34 |
-
if initial_gen and isinstance(initial_gen, list):
|
35 |
-
initial_answer = initial_gen[0]["generated_text"]
|
36 |
-
else:
|
37 |
-
initial_answer = "No answer found."
|
38 |
|
39 |
-
#
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
1 |
from transformers import pipeline
|
2 |
from retrieval import get_relevant_pubmed_docs
|
|
|
3 |
|
4 |
+
# Use Microsoft BioGPT-Large-PubMedQA for generation.
|
5 |
MODEL_NAME = "microsoft/BioGPT-Large-PubMedQA"
|
6 |
qa_pipeline = pipeline("text-generation", model=MODEL_NAME)
|
7 |
|
8 |
+
# In-memory cache for documents (used later in the knowledge graph).
|
9 |
docs_cache = {}
|
10 |
|
11 |
def process_medical_query(query: str):
|
12 |
"""
|
13 |
+
Processes a clinical query in two steps:
|
14 |
+
|
15 |
+
1. Retrieve relevant PubMed abstracts using a retrieval pipeline.
|
16 |
+
2. Generate an answer using BioGPT, leveraging the retrieved abstracts as context.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
- The generated answer.
|
20 |
+
- An empty list for sub-questions (omitted for speed).
|
21 |
+
- The initial answer (same as final in this simplified pipeline).
|
22 |
+
- An empty critique (omitted).
|
23 |
+
|
24 |
+
Designed for clinical use with clear, concise responses.
|
25 |
"""
|
26 |
+
# Retrieve relevant documents via PubMed and Chroma.
|
|
|
|
|
|
|
27 |
relevant_docs = get_relevant_pubmed_docs(query)
|
28 |
docs_cache[query] = relevant_docs
|
29 |
|
30 |
if not relevant_docs:
|
31 |
+
return ("No documents found for this query.", [], "", "")
|
32 |
|
33 |
+
# Combine the retrieved abstracts as context.
|
34 |
context_text = "\n\n".join(relevant_docs)
|
35 |
prompt = f"Question: {query}\nContext: {context_text}\nAnswer:"
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
# Generate an answer using BioGPT.
|
38 |
+
generation = qa_pipeline(prompt, max_new_tokens=100, truncation=True)
|
39 |
+
if generation and isinstance(generation, list):
|
40 |
+
answer = generation[0]["generated_text"]
|
41 |
+
else:
|
42 |
+
answer = "No answer found."
|
43 |
|
44 |
+
# In this streamlined pipeline, we return the same answer for all outputs.
|
45 |
+
sub_questions = [] # No sub-questions generated.
|
46 |
+
critique = "" # No self-critique performed.
|
47 |
+
return answer, sub_questions, answer, critique
|