from transformers import pipeline from retrieval import get_relevant_pubmed_docs from mini_ladder import generate_sub_questions, self_critique_and_refine # Use Microsoft BioGPT-Large-PubMedQA for generation MODEL_NAME = "microsoft/BioGPT-Large-PubMedQA" qa_pipeline = pipeline("text-generation", model=MODEL_NAME) # In-memory cache for documents (used for graph generation) docs_cache = {} def process_medical_query(query: str): """ Processes the query in four steps: 1. Generate sub-questions. 2. Retrieve relevant PubMed documents. 3. Generate an initial answer. 4. Self-critique and refine the answer. """ # Step 1: Generate sub-questions (naively) sub_questions = generate_sub_questions(query) # Step 2: Retrieve relevant documents via PubMed and Chroma relevant_docs = get_relevant_pubmed_docs(query) docs_cache[query] = relevant_docs if not relevant_docs: return ("No documents found for this query.", sub_questions, "", "") # Step 3: Generate an initial answer context_text = "\n\n".join(relevant_docs) prompt = f"Question: {query}\nContext: {context_text}\nAnswer:" initial_gen = qa_pipeline(prompt, max_new_tokens=100, truncation=True) if initial_gen and isinstance(initial_gen, list): initial_answer = initial_gen[0]["generated_text"] else: initial_answer = "No answer found." # Step 4: Self-critique and refine the answer final_answer, critique = self_critique_and_refine(query, initial_answer, relevant_docs) return (final_answer, sub_questions, initial_answer, critique)