Spaces:

mgbam
/

Medic

Running

App Files Files Community

mgbam commited on 4 days ago

Commit

a23c4d4

verified ·

1 Parent(s): 1375d63

Update mini_ladder.py

Browse files

Files changed (1) hide show

mini_ladder.py +26 -28

mini_ladder.py CHANGED Viewed

@@ -3,12 +3,11 @@ from transformers import pipeline, AutoTokenizer
 # ------------------------------
 # 1) CRITIQUE MODEL & TOKENIZER
 # ------------------------------
-# Using GPT-2 for self-critique
 CRITIQUE_MODEL_NAME = "gpt2"
 critique_pipeline = pipeline("text-generation", model=CRITIQUE_MODEL_NAME)
 critique_tokenizer = AutoTokenizer.from_pretrained(CRITIQUE_MODEL_NAME)
-# GPT-2 typically has a max context length of 1024 tokens
 GPT2_MAX_CONTEXT = 1024
 # ------------------------------
@@ -16,7 +15,7 @@ GPT2_MAX_CONTEXT = 1024
 # ------------------------------
 def generate_sub_questions(main_query: str):
     """
-    Naive approach to generating sub-questions.
     """
     return [
         f"1) What are common causes of {main_query}?",
@@ -29,10 +28,10 @@ def generate_sub_questions(main_query: str):
 # ------------------------------
 def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     """
-    1) Critique the initial answer (GPT-2).
-    2) If needed, refine using the original BioGPT pipeline.
     """
-    # A) Construct the critique prompt
     critique_prompt = (
         f"The following is an answer to the question '{query}'. "
         "Evaluate its correctness, clarity, and completeness. "
@@ -40,24 +39,24 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
         f"ANSWER:\n{initial_answer}\n\n"
         "CRITIQUE:"
     )
-    # B) Truncate the critique prompt to fit GPT-2’s max context
-    truncated_critique_prompt = _truncate_prompt_for_gpt2(critique_prompt)
-    # C) Generate the critique
     critique_gen = critique_pipeline(
-        truncated_critique_prompt,
-        max_new_tokens=80,   # how many tokens to generate for the critique
-        truncation=True      # ensure we don't exceed the final length
     )
     if critique_gen and isinstance(critique_gen, list):
         critique_text = critique_gen[0]["generated_text"]
     else:
         critique_text = "No critique generated."
-    # D) If critique suggests issues, refine using BioGPT
     if any(word in critique_text.lower() for word in ["missing", "incomplete", "incorrect", "lacks"]):
-        # Build a refine prompt that includes docs
         refine_prompt = (
             f"Question: {query}\n"
             f"Current Answer: {initial_answer}\n"
@@ -67,11 +66,8 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
             + "\n\n".join(docs)
             + "\nREFINED ANSWER:"
         )
-        # If BioGPT has similar context limits, you can truncate here too
-        # e.g., refine_prompt = _truncate_prompt_for_biogpt(refine_prompt)
-        from backend import qa_pipeline  # Import to avoid circular references
         refined_gen = qa_pipeline(refine_prompt, max_new_tokens=120, truncation=True)
         if refined_gen and isinstance(refined_gen, list):
             refined_answer = refined_gen[0]["generated_text"]
@@ -83,15 +79,17 @@ def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     return refined_answer, critique_text
 # ------------------------------
-# 4) HELPER: GPT-2 TRUNCATION
 # ------------------------------
-def _truncate_prompt_for_gpt2(prompt_text: str) -> str:
     """
-    Token-level truncation to ensure the prompt doesn't exceed GPT-2’s 1024-token limit.
     """
     tokens = critique_tokenizer.encode(prompt_text, add_special_tokens=False)
-    if len(tokens) > GPT2_MAX_CONTEXT:
-        # Keep the first 1024 tokens
-        tokens = tokens[:GPT2_MAX_CONTEXT]
     truncated_text = critique_tokenizer.decode(tokens, skip_special_tokens=True)
     return truncated_text

 # ------------------------------
 # 1) CRITIQUE MODEL & TOKENIZER
 # ------------------------------
 CRITIQUE_MODEL_NAME = "gpt2"
 critique_pipeline = pipeline("text-generation", model=CRITIQUE_MODEL_NAME)
 critique_tokenizer = AutoTokenizer.from_pretrained(CRITIQUE_MODEL_NAME)
+# GPT-2 has a maximum context length of 1024 tokens.
 GPT2_MAX_CONTEXT = 1024
 # ------------------------------
 # ------------------------------
 def generate_sub_questions(main_query: str):
     """
+    Naively generates sub-questions for the given main query.
     """
     return [
         f"1) What are common causes of {main_query}?",
 # ------------------------------
 def self_critique_and_refine(query: str, initial_answer: str, docs: list):
     """
+    1) Uses GPT-2 to critique the initial answer.
+    2) If the critique indicates missing or incomplete details, refines the answer using BioGPT.
     """
+    # A) Construct the critique prompt.
     critique_prompt = (
         f"The following is an answer to the question '{query}'. "
         "Evaluate its correctness, clarity, and completeness. "
         f"ANSWER:\n{initial_answer}\n\n"
         "CRITIQUE:"
     )
+    # B) Truncate the prompt so that prompt tokens + new tokens <= GPT2_MAX_CONTEXT.
+    # Reserve a buffer for new tokens (default 80 tokens).
+    truncated_prompt = _truncate_prompt_for_gpt2(critique_prompt, buffer=80)
+    # C) Generate the critique using the truncated prompt.
     critique_gen = critique_pipeline(
+        truncated_prompt,
+        max_new_tokens=80,   # tokens to generate for critique
+        truncation=True
     )
     if critique_gen and isinstance(critique_gen, list):
         critique_text = critique_gen[0]["generated_text"]
     else:
         critique_text = "No critique generated."
+    # D) If the critique flags issues, refine the answer using BioGPT.
     if any(word in critique_text.lower() for word in ["missing", "incomplete", "incorrect", "lacks"]):
         refine_prompt = (
             f"Question: {query}\n"
             f"Current Answer: {initial_answer}\n"
             + "\n\n".join(docs)
             + "\nREFINED ANSWER:"
         )
+        # Optionally, if BioGPT also has context limits, apply a similar truncation method.
+        from backend import qa_pipeline  # Import here to avoid circular imports.
         refined_gen = qa_pipeline(refine_prompt, max_new_tokens=120, truncation=True)
         if refined_gen and isinstance(refined_gen, list):
             refined_answer = refined_gen[0]["generated_text"]
     return refined_answer, critique_text
 # ------------------------------
+# 4) HELPER: GPT-2 PROMPT TRUNCATION
 # ------------------------------
+def _truncate_prompt_for_gpt2(prompt_text: str, buffer: int = 80) -> str:
     """
+    Truncates the input prompt so that its token count plus a reserved buffer
+    (for new tokens) does not exceed GPT-2's maximum context length.
     """
     tokens = critique_tokenizer.encode(prompt_text, add_special_tokens=False)
+    # Ensure we leave room for 'buffer' tokens for generation.
+    max_allowed = GPT2_MAX_CONTEXT - buffer
+    if len(tokens) > max_allowed:
+        tokens = tokens[:max_allowed]
     truncated_text = critique_tokenizer.decode(tokens, skip_special_tokens=True)
     return truncated_text