News_Summarizer / app.py
xiddiqui's picture
updated the app.py because of gpu error
adfe61d
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
# Set the model name and parameters
model_name = "xiddiqui/News_Summarizer"
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection
load_in_4bit = False # Use False if we aren't using 4bit quantization
# Set the device to CPU explicitly
device = "cpu"
# Load model and tokenizer
# 1. Load the base model (unsloth/meta-llama-3.1-8b)
base_model_name = "unsloth/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# Disable GPU quantization or 4-bit loading explicitly to avoid bitsandbytes
model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)
# 2. Load your fine-tuned model with the LoRA adapter
adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
model = PeftModel.from_pretrained(model, adapter_model_name)
# Move model to CPU (no need for GPU)
model.to(device)
# Define the summarization function
def generate_summary(input_text):
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Summarize the following:
### Input:
{}
### Response:
"""
# Tokenize and prepare the input text
inputs = tokenizer(
[alpaca_prompt.format(input_text)],
return_tensors="pt",
truncation=True,
max_length=max_seq_length
).to(device) # Ensure computations are done on the CPU
# Generate summary
summary_ids = model.generate(
**inputs,
max_length=128, # Limit the length of the output
num_beams=4, # Set the beam search to get a better output
no_repeat_ngram_size=2, # Avoid repeating n-grams in the summary
early_stopping=True
)
# Decode the output summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
# Set up Gradio interface
iface = gr.Interface(
fn=generate_summary, # Function to be called on user input
inputs="text", # Single text box for user input
outputs="text", # Output as text
live=True, # Optional: updates summary as user types
title="News Summarizer", # Title of the app
description="Enter a news article, and get a concise summary of the content."
)
# Launch Gradio app
iface.launch()