|
from transformers import pipeline |
|
from retrieval import get_relevant_pubmed_docs |
|
|
|
|
|
MODEL_NAME = "microsoft/BioGPT-Large-PubMedQA" |
|
qa_pipeline = pipeline("text-generation", model=MODEL_NAME) |
|
|
|
|
|
docs_cache = {} |
|
|
|
def process_medical_query(query: str): |
|
""" |
|
Processes a clinical query in two steps: |
|
|
|
1. Retrieve relevant PubMed abstracts using a retrieval pipeline. |
|
2. Generate an answer using BioGPT, leveraging the retrieved abstracts as context. |
|
|
|
Returns: |
|
- The generated answer. |
|
- An empty list for sub-questions (omitted for speed). |
|
- The initial answer (same as final in this simplified pipeline). |
|
- An empty critique (omitted). |
|
|
|
Designed for clinical use with clear, concise responses. |
|
""" |
|
|
|
relevant_docs = get_relevant_pubmed_docs(query) |
|
docs_cache[query] = relevant_docs |
|
|
|
if not relevant_docs: |
|
return ("No documents found for this query.", [], "", "") |
|
|
|
|
|
context_text = "\n\n".join(relevant_docs) |
|
prompt = f"Question: {query}\nContext: {context_text}\nAnswer:" |
|
|
|
|
|
generation = qa_pipeline(prompt, max_new_tokens=100, truncation=True) |
|
if generation and isinstance(generation, list): |
|
answer = generation[0]["generated_text"] |
|
else: |
|
answer = "No answer found." |
|
|
|
|
|
sub_questions = [] |
|
critique = "" |
|
return answer, sub_questions, answer, critique |
|
|