Spaces:

xiddiqui
/

News_Summarizer

Runtime error

App Files Files Community

News_Summarizer / app.py

xiddiqui

updated the app.py because of gpu error

adfe61d about 2 months ago

raw

history blame contribute delete

2.53 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel
	import torch

	# Set the model name and parameters
	model_name = "xiddiqui/News_Summarizer"
	max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
	dtype = None # None for auto detection
	load_in_4bit = False # Use False if we aren't using 4bit quantization

	# Set the device to CPU explicitly
	device = "cpu"

	# Load model and tokenizer

	# 1. Load the base model (unsloth/meta-llama-3.1-8b)
	base_model_name = "unsloth/Meta-Llama-3.1-8B"
	tokenizer = AutoTokenizer.from_pretrained(base_model_name)

	# Disable GPU quantization or 4-bit loading explicitly to avoid bitsandbytes
	model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32)

	# 2. Load your fine-tuned model with the LoRA adapter
	adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face
	model = PeftModel.from_pretrained(model, adapter_model_name)

	# Move model to CPU (no need for GPU)
	model.to(device)

	# Define the summarization function
	def generate_summary(input_text):
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	Summarize the following:

	### Input:
	{}

	### Response:
	"""

	# Tokenize and prepare the input text
	inputs = tokenizer(
	[alpaca_prompt.format(input_text)],
	return_tensors="pt",
	truncation=True,
	max_length=max_seq_length
	).to(device) # Ensure computations are done on the CPU

	# Generate summary
	summary_ids = model.generate(
	**inputs,
	max_length=128, # Limit the length of the output
	num_beams=4, # Set the beam search to get a better output
	no_repeat_ngram_size=2, # Avoid repeating n-grams in the summary
	early_stopping=True
	)

	# Decode the output summary
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return summary

	# Set up Gradio interface
	iface = gr.Interface(
	fn=generate_summary, # Function to be called on user input
	inputs="text", # Single text box for user input
	outputs="text", # Output as text
	live=True, # Optional: updates summary as user types
	title="News Summarizer", # Title of the app
	description="Enter a news article, and get a concise summary of the content."
	)

	# Launch Gradio app
	iface.launch()