Spaces:
Runtime error
Runtime error
updated the app.py file, changed the code to cpu based, removed unsloth
Browse files
app.py
CHANGED
@@ -1,19 +1,15 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
11 |
-
model_name = "xiddiqui/News_Summarizer",
|
12 |
-
max_seq_length = max_seq_length,
|
13 |
-
dtype = dtype,
|
14 |
-
load_in_4bit = load_in_4bit,
|
15 |
-
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
16 |
-
)
|
17 |
|
18 |
# Define the summarization function
|
19 |
def generate_summary(input_text):
|
@@ -27,17 +23,18 @@ def generate_summary(input_text):
|
|
27 |
|
28 |
### Response:
|
29 |
"""
|
30 |
-
|
31 |
# Tokenize and prepare the input text
|
32 |
inputs = tokenizer(
|
33 |
[alpaca_prompt.format(input_text)],
|
34 |
return_tensors="pt",
|
35 |
truncation=True,
|
36 |
max_length=max_seq_length
|
37 |
-
)
|
|
|
|
|
|
|
38 |
|
39 |
# Set up TextStreamer for efficient text generation
|
40 |
-
from transformers import TextStreamer
|
41 |
text_streamer = TextStreamer(tokenizer)
|
42 |
|
43 |
# Generate summary
|
@@ -63,4 +60,3 @@ iface = gr.Interface(
|
|
63 |
|
64 |
# Launch Gradio app
|
65 |
iface.launch()
|
66 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextStreamer
|
3 |
|
4 |
+
# Set up parameters
|
5 |
+
max_seq_length = 2048 # Max sequence length
|
6 |
+
dtype = None # Automatically detect dtype; if GPU available, use float16, else use CPU
|
7 |
+
load_in_4bit = True # Use 4-bit quantization for reduced memory usage
|
8 |
|
9 |
+
# Load the model and tokenizer using Hugging Face's AutoModel and AutoTokenizer
|
10 |
+
model_name = "xiddiqui/News_Summarizer"
|
11 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=dtype, revision="4bit" if load_in_4bit else "main")
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Define the summarization function
|
15 |
def generate_summary(input_text):
|
|
|
23 |
|
24 |
### Response:
|
25 |
"""
|
|
|
26 |
# Tokenize and prepare the input text
|
27 |
inputs = tokenizer(
|
28 |
[alpaca_prompt.format(input_text)],
|
29 |
return_tensors="pt",
|
30 |
truncation=True,
|
31 |
max_length=max_seq_length
|
32 |
+
)
|
33 |
+
|
34 |
+
# Ensure that the model runs on CPU
|
35 |
+
inputs = {key: value.to("cpu") for key, value in inputs.items()}
|
36 |
|
37 |
# Set up TextStreamer for efficient text generation
|
|
|
38 |
text_streamer = TextStreamer(tokenizer)
|
39 |
|
40 |
# Generate summary
|
|
|
60 |
|
61 |
# Launch Gradio app
|
62 |
iface.launch()
|
|