|
from transformers import pipeline |
|
from retrieval import get_relevant_pubmed_docs |
|
from mini_ladder import generate_sub_questions, self_critique_and_refine |
|
|
|
|
|
MODEL_NAME = "microsoft/BioGPT-Large-PubMedQA" |
|
qa_pipeline = pipeline("text-generation", model=MODEL_NAME) |
|
|
|
|
|
docs_cache = {} |
|
|
|
def process_medical_query(query: str): |
|
""" |
|
Processes the query in four steps: |
|
1. Generate sub-questions. |
|
2. Retrieve relevant PubMed documents. |
|
3. Generate an initial answer. |
|
4. Self-critique and refine the answer. |
|
""" |
|
|
|
sub_questions = generate_sub_questions(query) |
|
|
|
|
|
relevant_docs = get_relevant_pubmed_docs(query) |
|
docs_cache[query] = relevant_docs |
|
|
|
if not relevant_docs: |
|
return ("No documents found for this query.", sub_questions, "", "") |
|
|
|
|
|
context_text = "\n\n".join(relevant_docs) |
|
prompt = f"Question: {query}\nContext: {context_text}\nAnswer:" |
|
initial_gen = qa_pipeline(prompt, max_new_tokens=100, truncation=True) |
|
if initial_gen and isinstance(initial_gen, list): |
|
initial_answer = initial_gen[0]["generated_text"] |
|
else: |
|
initial_answer = "No answer found." |
|
|
|
|
|
final_answer, critique = self_critique_and_refine(query, initial_answer, relevant_docs) |
|
|
|
return (final_answer, sub_questions, initial_answer, critique) |
|
|