xiddiqui commited on
Commit
1671d1f
·
1 Parent(s): 5c0e9f4

updated the app.py because of gpu error

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
  from peft import PeftModel
5
 
6
  # Set the model name and parameters
@@ -9,8 +8,8 @@ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
9
  dtype = None # None for auto detection
10
  load_in_4bit = False # Use False if we aren't using 4bit quantization
11
 
12
- # Check device availability (GPU or CPU)
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  # Load model and tokenizer
16
 
@@ -23,7 +22,7 @@ model = AutoModelForCausalLM.from_pretrained(base_model_name)
23
  adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
24
  model = PeftModel.from_pretrained(model, adapter_model_name)
25
 
26
- # Move model to the appropriate device (GPU or CPU)
27
  model.to(device)
28
 
29
  # Define the summarization function
@@ -45,7 +44,7 @@ def generate_summary(input_text):
45
  return_tensors="pt",
46
  truncation=True,
47
  max_length=max_seq_length
48
- ).to(device) # Ensure computations are done on the same device as the model (CPU or GPU)
49
 
50
  # Generate summary
51
  summary_ids = model.generate(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
  from peft import PeftModel
4
 
5
  # Set the model name and parameters
 
8
  dtype = None # None for auto detection
9
  load_in_4bit = False # Use False if we aren't using 4bit quantization
10
 
11
+ # Set the device to CPU explicitly
12
+ device = "cpu"
13
 
14
  # Load model and tokenizer
15
 
 
22
  adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
23
  model = PeftModel.from_pretrained(model, adapter_model_name)
24
 
25
+ # Move model to CPU (no need for GPU)
26
  model.to(device)
27
 
28
  # Define the summarization function
 
44
  return_tensors="pt",
45
  truncation=True,
46
  max_length=max_seq_length
47
+ ).to(device) # Ensure computations are done on the CPU
48
 
49
  # Generate summary
50
  summary_ids = model.generate(