Spaces:

mgbam
/

Medic

Running

App Files Files Community

mgbam commited on 5 days ago

Commit

76cdef3

verified ·

1 Parent(s): 70284a9

Update mini_ladder.py

Browse files

Files changed (1) hide show

mini_ladder.py +16 -16

mini_ladder.py CHANGED Viewed

@@ -3,11 +3,13 @@ from transformers import pipeline, AutoTokenizer
 # ------------------------------
 # 1) CRITIQUE MODEL & TOKENIZER
 # ------------------------------
-CRITIQUE_MODEL_NAME = "gpt2"
 critique_pipeline = pipeline("text-generation", model=CRITIQUE_MODEL_NAME)
 critique_tokenizer = AutoTokenizer.from_pretrained(CRITIQUE_MODEL_NAME)
-# GPT-2 has a maximum context length of 1024 tokens.
 GPT2_MAX_CONTEXT = 1024
 # ------------------------------
@@ -28,8 +30,8 @@ def generate_sub_questions(main_query: str):
 # ------------------------------
 def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     """
-    1) Uses GPT-2 to critique the initial answer.
-    2) If the critique indicates missing or incomplete details, refines the answer using BioGPT.
     """
     # A) Construct the critique prompt.
     critique_prompt = (
@@ -40,14 +42,13 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
         "CRITIQUE:"
     )
-    # B) Truncate the prompt so that prompt tokens + new tokens <= GPT2_MAX_CONTEXT.
-    # Reserve a buffer for new tokens (default 80 tokens).
-    truncated_prompt = _truncate_prompt_for_gpt2(critique_prompt, buffer=80)
-    # C) Generate the critique using the truncated prompt.
     critique_gen = critique_pipeline(
         truncated_prompt,
-        max_new_tokens=20,   # tokens to generate for critique
         truncation=True
     )
     if critique_gen and isinstance(critique_gen, list):
@@ -55,7 +56,7 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     else:
         critique_text = "No critique generated."
-    # D) If the critique flags issues, refine the answer using BioGPT.
     if any(word in critique_text.lower() for word in ["missing", "incomplete", "incorrect", "lacks"]):
         refine_prompt = (
             f"Question: {query}\n"
@@ -66,8 +67,8 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
             + "\n\n".join(docs)
             + "\nREFINED ANSWER:"
         )
-        # Optionally, if BioGPT also has context limits, apply a similar truncation method.
-        from backend import qa_pipeline  # Import here to avoid circular imports.
         refined_gen = qa_pipeline(refine_prompt, max_new_tokens=120, truncation=True)
         if refined_gen and isinstance(refined_gen, list):
             refined_answer = refined_gen[0]["generated_text"]
@@ -81,13 +82,12 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
 # ------------------------------
 # 4) HELPER: GPT-2 PROMPT TRUNCATION
 # ------------------------------
-def _truncate_prompt_for_gpt2(prompt_text: str, buffer: int = 80) -> str:
     """
-    Truncates the input prompt so that its token count plus a reserved buffer
-    (for new tokens) does not exceed GPT-2's maximum context length.
     """
     tokens = critique_tokenizer.encode(prompt_text, add_special_tokens=False)
-    # Ensure we leave room for 'buffer' tokens for generation.
     max_allowed = GPT2_MAX_CONTEXT - buffer
     if len(tokens) > max_allowed:
         tokens = tokens[:max_allowed]

 # ------------------------------
 # 1) CRITIQUE MODEL & TOKENIZER
 # ------------------------------
+# Use DistilGPT-2 (a smaller, distilled version of GPT-2) for faster inference on CPU.
+CRITIQUE_MODEL_NAME = "distilgpt2"
 critique_pipeline = pipeline("text-generation", model=CRITIQUE_MODEL_NAME)
 critique_tokenizer = AutoTokenizer.from_pretrained(CRITIQUE_MODEL_NAME)
+# DistilGPT-2 has a maximum context length similar to GPT-2 (around 1024 tokens),
+# but we reserve a smaller buffer since we now generate fewer tokens.
 GPT2_MAX_CONTEXT = 1024
 # ------------------------------
 # ------------------------------
 def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     """
+    Uses a smaller model (DistilGPT-2) for self-critique, with a reduced max_new_tokens.
+    If the critique indicates issues, refines the answer using BioGPT.
     """
     # A) Construct the critique prompt.
     critique_prompt = (
         "CRITIQUE:"
     )
+    # B) Truncate the prompt so that prompt tokens + new tokens (20) <= GPT2_MAX_CONTEXT.
+    truncated_prompt = _truncate_prompt_for_gpt2(critique_prompt, buffer=20)
+    # C) Generate the critique using DistilGPT-2.
     critique_gen = critique_pipeline(
         truncated_prompt,
+        max_new_tokens=20,   # Reduced new tokens for speed.
         truncation=True
     )
     if critique_gen and isinstance(critique_gen, list):
     else:
         critique_text = "No critique generated."
+    # D) If the critique flags issues, refine using BioGPT.
     if any(word in critique_text.lower() for word in ["missing", "incomplete", "incorrect", "lacks"]):
         refine_prompt = (
             f"Question: {query}\n"
             + "\n\n".join(docs)
             + "\nREFINED ANSWER:"
         )
+        # Optionally, you might also truncate the refine_prompt if needed.
+        from backend import qa_pipeline  # Import here to avoid circular dependencies.
         refined_gen = qa_pipeline(refine_prompt, max_new_tokens=120, truncation=True)
         if refined_gen and isinstance(refined_gen, list):
             refined_answer = refined_gen[0]["generated_text"]
 # ------------------------------
 # 4) HELPER: GPT-2 PROMPT TRUNCATION
 # ------------------------------
+def _truncate_prompt_for_gpt2(prompt_text: str, buffer: int = 20) -> str:
     """
+    Truncates the input prompt so that its token count plus a reserved buffer for new tokens
+    does not exceed GPT-2's (or DistilGPT-2's) maximum context length.
     """
     tokens = critique_tokenizer.encode(prompt_text, add_special_tokens=False)
     max_allowed = GPT2_MAX_CONTEXT - buffer
     if len(tokens) > max_allowed:
         tokens = tokens[:max_allowed]