Spaces:
Runtime error
Runtime error
File size: 2,529 Bytes
91200a4 78f89b2 74ecf43 1d523f1 aa11130 78f89b2 135d921 78f89b2 1671d1f 74ecf43 78f89b2 74ecf43 adfe61d 74ecf43 1d523f1 74ecf43 1671d1f 74ecf43 aa11130 91200a4 aa11130 91200a4 aa11130 78f89b2 aa11130 1671d1f aa11130 78f89b2 aa11130 91200a4 aa11130 91200a4 aa11130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
# Set the model name and parameters
model_name = "xiddiqui/News_Summarizer"
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection
load_in_4bit = False # Use False if we aren't using 4bit quantization
# Set the device to CPU explicitly
device = "cpu"
# Load model and tokenizer
# 1. Load the base model (unsloth/meta-llama-3.1-8b)
base_model_name = "unsloth/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# Disable GPU quantization or 4-bit loading explicitly to avoid bitsandbytes
model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)
# 2. Load your fine-tuned model with the LoRA adapter
adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
model = PeftModel.from_pretrained(model, adapter_model_name)
# Move model to CPU (no need for GPU)
model.to(device)
# Define the summarization function
def generate_summary(input_text):
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Summarize the following:
### Input:
{}
### Response:
"""
# Tokenize and prepare the input text
inputs = tokenizer(
[alpaca_prompt.format(input_text)],
return_tensors="pt",
truncation=True,
max_length=max_seq_length
).to(device) # Ensure computations are done on the CPU
# Generate summary
summary_ids = model.generate(
**inputs,
max_length=128, # Limit the length of the output
num_beams=4, # Set the beam search to get a better output
no_repeat_ngram_size=2, # Avoid repeating n-grams in the summary
early_stopping=True
)
# Decode the output summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
# Set up Gradio interface
iface = gr.Interface(
fn=generate_summary, # Function to be called on user input
inputs="text", # Single text box for user input
outputs="text", # Output as text
live=True, # Optional: updates summary as user types
title="News Summarizer", # Title of the app
description="Enter a news article, and get a concise summary of the content."
)
# Launch Gradio app
iface.launch()
|