import gradio as gr

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from unsloth import FastLanguageModel

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "xiddiqui/News_Summarizer",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# Define the summarization function
def generate_summary(input_text):
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    Summarize the following: 

    ### Input:
    {}

    ### Response:
    """

    # Tokenize and prepare the input text
    inputs = tokenizer(
        [alpaca_prompt.format(input_text)], 
        return_tensors="pt", 
        truncation=True, 
        max_length=max_seq_length
    ).to("cpu")  # Ensure computations are done on CPU

    # Set up TextStreamer for efficient text generation
    from transformers import TextStreamer
    text_streamer = TextStreamer(tokenizer)

    # Generate summary
    summary_ids = model.generate(
        **inputs, 
        streamer=text_streamer, 
        max_new_tokens=64  # Limit the length of the output
    )

    # Decode the output summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Set up Gradio interface
iface = gr.Interface(
    fn=generate_summary,  # Function to be called on user input
    inputs="text",  # Single text box for user input
    outputs="text",  # Output as text
    live=True,  # Optional: updates summary as user types
    title="News Summarizer",  # Title of the app
    description="Enter a news article, and get a concise summary of the content."
)

# Launch Gradio app
iface.launch()