Spaces:

xiddiqui
/

News_Summarizer

Runtime error

xiddiqui commited on Jan 21

Commit

74ecf43

1 Parent(s): 78f89b2

updated the app.py file, removed model error due to seq2seq

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # Set the model name and parameters
 model_name = "xiddiqui/News_Summarizer"
@@ -8,9 +9,22 @@ max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
 dtype = None  # None for auto detection
 load_in_4bit = False  # Use False if we aren't using 4bit quantization
 # Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype)
 # Define the summarization function
 def generate_summary(input_text):
@@ -31,7 +45,7 @@ def generate_summary(input_text):
         return_tensors="pt",
         truncation=True,
         max_length=max_seq_length
-    ).to("cpu")  # Ensure computations are done on CPU (change to "cuda" if using GPU)
     # Generate summary
     summary_ids = model.generate(

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+from peft import PeftModel
 # Set the model name and parameters
 model_name = "xiddiqui/News_Summarizer"
 dtype = None  # None for auto detection
 load_in_4bit = False  # Use False if we aren't using 4bit quantization
+# Check device availability (GPU or CPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load model and tokenizer
+# 1. Load the base model (unsloth/meta-llama-3.1-8b-bnb-4bit)
+base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
+tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+model = AutoModelForCausalLM.from_pretrained(base_model_name)
+# 2. Load your fine-tuned model with the LoRA adapter
+adapter_model_name = "xiddiqui/News_Summarizer"  # Your model path on Hugging Face
+model = PeftModel.from_pretrained(model, adapter_model_name)
+# Move model to the appropriate device (GPU or CPU)
+model.to(device)
 # Define the summarization function
 def generate_summary(input_text):
         return_tensors="pt",
         truncation=True,
         max_length=max_seq_length
+    ).to(device)  # Ensure computations are done on the same device as the model (CPU or GPU)
     # Generate summary
     summary_ids = model.generate(

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 transformers
-gradio

 torch
 transformers
+gradio
+peft