xiddiqui commited on
Commit
135d921
·
1 Parent(s): 043c5fa

updated the app.py file, changed the code to cpu based, removed unsloth

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -1,19 +1,15 @@
1
  import gradio as gr
 
2
 
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- from unsloth import FastLanguageModel
 
 
5
 
6
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
7
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
8
- load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
9
-
10
- model, tokenizer = FastLanguageModel.from_pretrained(
11
- model_name = "xiddiqui/News_Summarizer",
12
- max_seq_length = max_seq_length,
13
- dtype = dtype,
14
- load_in_4bit = load_in_4bit,
15
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
16
- )
17
 
18
  # Define the summarization function
19
  def generate_summary(input_text):
@@ -27,17 +23,18 @@ def generate_summary(input_text):
27
 
28
  ### Response:
29
  """
30
-
31
  # Tokenize and prepare the input text
32
  inputs = tokenizer(
33
  [alpaca_prompt.format(input_text)],
34
  return_tensors="pt",
35
  truncation=True,
36
  max_length=max_seq_length
37
- ).to("gpu") # Ensure computations are done on CPU
 
 
 
38
 
39
  # Set up TextStreamer for efficient text generation
40
- from transformers import TextStreamer
41
  text_streamer = TextStreamer(tokenizer)
42
 
43
  # Generate summary
@@ -63,4 +60,3 @@ iface = gr.Interface(
63
 
64
  # Launch Gradio app
65
  iface.launch()
66
-
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextStreamer
3
 
4
+ # Set up parameters
5
+ max_seq_length = 2048 # Max sequence length
6
+ dtype = None # Automatically detect dtype; if GPU available, use float16, else use CPU
7
+ load_in_4bit = True # Use 4-bit quantization for reduced memory usage
8
 
9
+ # Load the model and tokenizer using Hugging Face's AutoModel and AutoTokenizer
10
+ model_name = "xiddiqui/News_Summarizer"
11
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=dtype, revision="4bit" if load_in_4bit else "main")
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
13
 
14
  # Define the summarization function
15
  def generate_summary(input_text):
 
23
 
24
  ### Response:
25
  """
 
26
  # Tokenize and prepare the input text
27
  inputs = tokenizer(
28
  [alpaca_prompt.format(input_text)],
29
  return_tensors="pt",
30
  truncation=True,
31
  max_length=max_seq_length
32
+ )
33
+
34
+ # Ensure that the model runs on CPU
35
+ inputs = {key: value.to("cpu") for key, value in inputs.items()}
36
 
37
  # Set up TextStreamer for efficient text generation
 
38
  text_streamer = TextStreamer(tokenizer)
39
 
40
  # Generate summary
 
60
 
61
  # Launch Gradio app
62
  iface.launch()