Spaces:

pendar02
/

biomedical

Sleeping

App Files Files Community

pendar02 commited on Jan 11

Commit

2ad2c86

verified ·

1 Parent(s): 65d5daa

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -30,22 +30,23 @@ def load_model(model_type):
     """Load appropriate model based on type with proper memory management"""
     try:
         # Clear any existing cached data
-        torch.cuda.empty_cache()
         gc.collect()
         if model_type == "summarize":
             base_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "facebook/bart-large-cnn",
                 cache_dir="./models",
                 low_cpu_mem_usage=True,
-                torch_dtype=torch.float32
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/results",
-                device_map="auto",
                 torch_dtype=torch.float32
-            )
             tokenizer = AutoTokenizer.from_pretrained(
                 "facebook/bart-large-cnn",
                 cache_dir="./models"
@@ -55,14 +56,14 @@ def load_model(model_type):
                 "GanjinZero/biobart-base",
                 cache_dir="./models",
                 low_cpu_mem_usage=True,
-                torch_dtype=torch.float32
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/biobart-finetune",
-                device_map="auto",
                 torch_dtype=torch.float32
-            )
             tokenizer = AutoTokenizer.from_pretrained(
                 "GanjinZero/biobart-base",
                 cache_dir="./models"
@@ -137,6 +138,7 @@ def generate_summary(text, model, tokenizer):
     min_length = min(50, word_count)  # Dynamic min length
     inputs = tokenizer(formatted_text, return_tensors="pt", max_length=1024, truncation=True)
     with torch.no_grad():
         summary_ids = model.generate(
@@ -167,6 +169,7 @@ def generate_focused_summary(question, abstracts, model, tokenizer):
     combined_input = f"Question: {question} Abstracts: " + " [SEP] ".join(formatted_abstracts)
     inputs = tokenizer(combined_input, return_tensors="pt", max_length=1024, truncation=True)
     with torch.no_grad():
         summary_ids = model.generate(

     """Load appropriate model based on type with proper memory management"""
     try:
         # Clear any existing cached data
         gc.collect()
+        device = "cpu"  # Force CPU usage
         if model_type == "summarize":
             base_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "facebook/bart-large-cnn",
                 cache_dir="./models",
                 low_cpu_mem_usage=True,
+                device_map={"": device}
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/results",
+                device_map={"": device},
                 torch_dtype=torch.float32
+            ).to(device)
             tokenizer = AutoTokenizer.from_pretrained(
                 "facebook/bart-large-cnn",
                 cache_dir="./models"
                 "GanjinZero/biobart-base",
                 cache_dir="./models",
                 low_cpu_mem_usage=True,
+                device_map={"": device}
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/biobart-finetune",
+                device_map={"": device},
                 torch_dtype=torch.float32
+            ).to(device)
             tokenizer = AutoTokenizer.from_pretrained(
                 "GanjinZero/biobart-base",
                 cache_dir="./models"
     min_length = min(50, word_count)  # Dynamic min length
     inputs = tokenizer(formatted_text, return_tensors="pt", max_length=1024, truncation=True)
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.no_grad():
         summary_ids = model.generate(
     combined_input = f"Question: {question} Abstracts: " + " [SEP] ".join(formatted_abstracts)
     inputs = tokenizer(combined_input, return_tensors="pt", max_length=1024, truncation=True)
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.no_grad():
         summary_ids = model.generate(