Spaces:

xiddiqui
/

News_Summarizer

Runtime error

xiddiqui commited on Jan 21

Commit

135d921

1 Parent(s): 043c5fa

updated the app.py file, changed the code to cpu based, removed unsloth

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,19 +1,15 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from unsloth import FastLanguageModel
-max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
-dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
-load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "xiddiqui/News_Summarizer",
-    max_seq_length = max_seq_length,
-    dtype = dtype,
-    load_in_4bit = load_in_4bit,
-    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
-)
 # Define the summarization function
 def generate_summary(input_text):
@@ -27,17 +23,18 @@ def generate_summary(input_text):
     ### Response:
     """
     # Tokenize and prepare the input text
     inputs = tokenizer(
         [alpaca_prompt.format(input_text)],
         return_tensors="pt",
         truncation=True,
         max_length=max_seq_length
-    ).to("gpu")  # Ensure computations are done on CPU
     # Set up TextStreamer for efficient text generation
-    from transformers import TextStreamer
     text_streamer = TextStreamer(tokenizer)
     # Generate summary
@@ -63,4 +60,3 @@ iface = gr.Interface(
 # Launch Gradio app
 iface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextStreamer
+# Set up parameters
+max_seq_length = 2048  # Max sequence length
+dtype = None  # Automatically detect dtype; if GPU available, use float16, else use CPU
+load_in_4bit = True  # Use 4-bit quantization for reduced memory usage
+# Load the model and tokenizer using Hugging Face's AutoModel and AutoTokenizer
+model_name = "xiddiqui/News_Summarizer"
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=dtype, revision="4bit" if load_in_4bit else "main")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Define the summarization function
 def generate_summary(input_text):
     ### Response:
     """
     # Tokenize and prepare the input text
     inputs = tokenizer(
         [alpaca_prompt.format(input_text)],
         return_tensors="pt",
         truncation=True,
         max_length=max_seq_length
+    )
+    # Ensure that the model runs on CPU
+    inputs = {key: value.to("cpu") for key, value in inputs.items()}
     # Set up TextStreamer for efficient text generation
     text_streamer = TextStreamer(tokenizer)
     # Generate summary
 # Launch Gradio app
 iface.launch()