import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch # Set the model name and parameters model_name = "xiddiqui/News_Summarizer" max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! dtype = None # None for auto detection load_in_4bit = False # Use False if we aren't using 4bit quantization # Set the device to CPU explicitly device = "cpu" # Load model and tokenizer # 1. Load the base model (unsloth/meta-llama-3.1-8b) base_model_name = "unsloth/Meta-Llama-3.1-8B" tokenizer = AutoTokenizer.from_pretrained(base_model_name) # Disable GPU quantization or 4-bit loading explicitly to avoid bitsandbytes model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32) # 2. Load your fine-tuned model with the LoRA adapter adapter_model_name = "xiddiqui/News_Summarizer" # Your model path on Hugging Face model = PeftModel.from_pretrained(model, adapter_model_name) # Move model to CPU (no need for GPU) model.to(device) # Define the summarization function def generate_summary(input_text): alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: Summarize the following: ### Input: {} ### Response: """ # Tokenize and prepare the input text inputs = tokenizer( [alpaca_prompt.format(input_text)], return_tensors="pt", truncation=True, max_length=max_seq_length ).to(device) # Ensure computations are done on the CPU # Generate summary summary_ids = model.generate( **inputs, max_length=128, # Limit the length of the output num_beams=4, # Set the beam search to get a better output no_repeat_ngram_size=2, # Avoid repeating n-grams in the summary early_stopping=True ) # Decode the output summary summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Set up Gradio interface iface = gr.Interface( fn=generate_summary, # Function to be called on user input inputs="text", # Single text box for user input outputs="text", # Output as text live=True, # Optional: updates summary as user types title="News Summarizer", # Title of the app description="Enter a news article, and get a concise summary of the content." ) # Launch Gradio app iface.launch()