Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

rivapereira123 commited on Jul 15

Commit

97b2b3c

verified ·

1 Parent(s): a12c950

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -30

app.py CHANGED Viewed

@@ -498,15 +498,14 @@ RESPONSE (provide practical, Gaza-appropriate medical guidance):"""
                 max_length=512,
                 padding="max_length"
             )
-        input_ids = inputs["input_ids"]
-        attention_mask = inputs["attention_mask"]
-        device = self.llm.device if hasattr(self.llm, "device") else "cpu"
-        input_ids = input_ids.to(device)
-        attention_mask = attention_mask.to(device)
-        # Generate output
-        with torch.no_grad():
-            outputs = self.llm.generate(
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 max_new_tokens=256,
@@ -515,27 +514,20 @@ RESPONSE (provide practical, Gaza-appropriate medical guidance):"""
                 do_sample=True,
                 repetition_penalty=1.15,
                 no_repeat_ngram_size=3
-            )
-        # Decode result
-        response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Clean and filter output
-        lines = response_text.split('\n')
-        unique_lines = []
-        for line in lines:
-            line = line.strip()
-            if line and line not in unique_lines and len(line) > 10:
-                unique_lines.append(line)
-        final_response = '\n'.join(unique_lines)
-        logger.info(f"🧪 Final cleaned response:\n{final_response}")
-        return final_response
-    except Exception as e:
-        logger.error(f"❌ Error in LLM generate(): {e}")
-        return self._generate_fallback_response(query, context)

                 max_length=512,
                 padding="max_length"
             )
+            input_ids = inputs["input_ids"]
+            attention_mask = inputs["attention_mask"]
+            device = self.llm.device if hasattr(self.llm, "device") else "cpu"
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            with torch.no_grad():
+                outputs = self.llm.generate(
                 input_ids=input_ids,
                 attention_mask=attention_mask,
                 max_new_tokens=256,
                 do_sample=True,
                 repetition_penalty=1.15,
                 no_repeat_ngram_size=3
+                )
+            response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            lines = response_text.split('\n')
+            unique_lines = []
+            for line in lines:
+                line = line.strip()
+                if line and line not in unique_lines and len(line) > 10:
+                    unique_lines.append(line)
+            final_response = '\n'.join(unique_lines)
+            logger.info(f"🧪 Final cleaned response:\n{final_response}")
+            return final_response
+        except Exception as e:
+            logger.error(f"❌ Error in LLM generate(): {e}")
+            return self._generate_fallback_response(query, context)