Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

rivapereira123 commited on Jul 15

Commit

a12c950

verified ·

1 Parent(s): b4c0f0a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -31

app.py CHANGED Viewed

@@ -480,52 +480,64 @@ Provide clear, actionable advice while emphasizing the need for professional med
         return "\n\n".join(context_parts)
     def _generate_response(self, query: str, context: str) -> str:
-        """Enhanced response generation using model.generate() to avoid DynamicCache errors"""
         if self.llm is None or self.tokenizer is None:
             return self._generate_fallback_response(query, context)
-        # Build prompt with Gaza-specific context
-        prompt = f"""{self.system_prompt}
 MEDICAL KNOWLEDGE CONTEXT:
 {context}
 PATIENT QUESTION: {query}
 RESPONSE (provide practical, Gaza-appropriate medical guidance):"""
         try:
-            # Tokenize and move to correct device
-            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-            if hasattr(self.llm, 'device'):
-                inputs = inputs.to(self.llm.device)
-            # Generate the response
-            with torch.no_grad():
-                outputs = self.generation_pipeline(prompt, max_new_tokens=300, temperature=0.3, repetition_penalty=1.15, no_repeat_ngram_size=3)
-                response_text = outputs[0]["generated_text"]
-            # Decode and clean up
-            response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract only the generated part
-            if "RESPONSE (provide practical, Gaza-appropriate medical guidance):" in response_text:
-                response_text = response_text.split("RESPONSE (provide practical, Gaza-appropriate medical guidance):")[1]
-            # Clean up the response
-            lines = response_text.split('\n')
-            unique_lines = []
-            unique_lines.append(line)
-            final_response = '\n'.join(unique_lines)
-            logger.info(f"🧪 Final cleaned response:\n{final_response}")
-            return final_response
-        except Exception as e:
-            logger.error(f"❌ Error in LLM generate(): {e}")
-            return self._generate_fallback_response(query, context)
     def _generate_fallback_response(self, query: str, context: str) -> str:
         """Enhanced fallback response with Gaza-specific guidance"""

         return "\n\n".join(context_parts)
     def _generate_response(self, query: str, context: str) -> str:
+        """Generate response using T5-style seq2seq model with Gaza-specific context"""
         if self.llm is None or self.tokenizer is None:
             return self._generate_fallback_response(query, context)
+            prompt = f"""{self.system_prompt}
 MEDICAL KNOWLEDGE CONTEXT:
 {context}
 PATIENT QUESTION: {query}
 RESPONSE (provide practical, Gaza-appropriate medical guidance):"""
         try:
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding="max_length"
+            )
+        input_ids = inputs["input_ids"]
+        attention_mask = inputs["attention_mask"]
+        device = self.llm.device if hasattr(self.llm, "device") else "cpu"
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        # Generate output
+        with torch.no_grad():
+            outputs = self.llm.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=256,
+                temperature=0.3,
+                pad_token_id=self.tokenizer.eos_token_id,
+                do_sample=True,
+                repetition_penalty=1.15,
+                no_repeat_ngram_size=3
+            )
+        # Decode result
+        response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Clean and filter output
+        lines = response_text.split('\n')
+        unique_lines = []
+        for line in lines:
+            line = line.strip()
+            if line and line not in unique_lines and len(line) > 10:
+                unique_lines.append(line)
+        final_response = '\n'.join(unique_lines)
+        logger.info(f"🧪 Final cleaned response:\n{final_response}")
+        return final_response
+    except Exception as e:
+        logger.error(f"❌ Error in LLM generate(): {e}")
+        return self._generate_fallback_response(query, context)
     def _generate_fallback_response(self, query: str, context: str) -> str:
         """Enhanced fallback response with Gaza-specific guidance"""