xiddiqui commited on
Commit
74ecf43
·
1 Parent(s): 78f89b2

updated the app.py file, removed model error due to seq2seq

Browse files
Files changed (2) hide show
  1. app.py +17 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
4
 
5
  # Set the model name and parameters
6
  model_name = "xiddiqui/News_Summarizer"
@@ -8,9 +9,22 @@ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
8
  dtype = None # None for auto detection
9
  load_in_4bit = False # Use False if we aren't using 4bit quantization
10
 
 
 
 
11
  # Load model and tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype)
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Define the summarization function
16
  def generate_summary(input_text):
@@ -31,7 +45,7 @@ def generate_summary(input_text):
31
  return_tensors="pt",
32
  truncation=True,
33
  max_length=max_seq_length
34
- ).to("cpu") # Ensure computations are done on CPU (change to "cuda" if using GPU)
35
 
36
  # Generate summary
37
  summary_ids = model.generate(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ from peft import PeftModel
5
 
6
  # Set the model name and parameters
7
  model_name = "xiddiqui/News_Summarizer"
 
9
  dtype = None # None for auto detection
10
  load_in_4bit = False # Use False if we aren't using 4bit quantization
11
 
12
+ # Check device availability (GPU or CPU)
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+
15
  # Load model and tokenizer
16
+
17
+ # 1. Load the base model (unsloth/meta-llama-3.1-8b-bnb-4bit)
18
+ base_model_name = "unsloth/meta-llama-3.1-8b-bnb-4bit"
19
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
20
+ model = AutoModelForCausalLM.from_pretrained(base_model_name)
21
+
22
+ # 2. Load your fine-tuned model with the LoRA adapter
23
+ adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
24
+ model = PeftModel.from_pretrained(model, adapter_model_name)
25
+
26
+ # Move model to the appropriate device (GPU or CPU)
27
+ model.to(device)
28
 
29
  # Define the summarization function
30
  def generate_summary(input_text):
 
45
  return_tensors="pt",
46
  truncation=True,
47
  max_length=max_seq_length
48
+ ).to(device) # Ensure computations are done on the same device as the model (CPU or GPU)
49
 
50
  # Generate summary
51
  summary_ids = model.generate(
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  transformers
3
- gradio
 
 
1
  torch
2
  transformers
3
+ gradio
4
+ peft