Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel | |
import torch | |
# Set the model name and parameters | |
model_name = "xiddiqui/News_Summarizer" | |
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! | |
dtype = None # None for auto detection | |
load_in_4bit = False # Use False if we aren't using 4bit quantization | |
# Set the device to CPU explicitly | |
device = "cpu" | |
# Load model and tokenizer | |
# 1. Load the base model (unsloth/meta-llama-3.1-8b) | |
base_model_name = "unsloth/Meta-Llama-3.1-8B" | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
# Disable GPU quantization or 4-bit loading explicitly to avoid bitsandbytes | |
model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32) | |
# 2. Load your fine-tuned model with the LoRA adapter | |
adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face | |
model = PeftModel.from_pretrained(model, adapter_model_name) | |
# Move model to CPU (no need for GPU) | |
model.to(device) | |
# Define the summarization function | |
def generate_summary(input_text): | |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Instruction: | |
Summarize the following: | |
### Input: | |
{} | |
### Response: | |
""" | |
# Tokenize and prepare the input text | |
inputs = tokenizer( | |
[alpaca_prompt.format(input_text)], | |
return_tensors="pt", | |
truncation=True, | |
max_length=max_seq_length | |
).to(device) # Ensure computations are done on the CPU | |
# Generate summary | |
summary_ids = model.generate( | |
**inputs, | |
max_length=128, # Limit the length of the output | |
num_beams=4, # Set the beam search to get a better output | |
no_repeat_ngram_size=2, # Avoid repeating n-grams in the summary | |
early_stopping=True | |
) | |
# Decode the output summary | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
# Set up Gradio interface | |
iface = gr.Interface( | |
fn=generate_summary, # Function to be called on user input | |
inputs="text", # Single text box for user input | |
outputs="text", # Output as text | |
live=True, # Optional: updates summary as user types | |
title="News Summarizer", # Title of the app | |
description="Enter a news article, and get a concise summary of the content." | |
) | |
# Launch Gradio app | |
iface.launch() | |