Spaces:

hhelesto
/

gradio-comment-generator

Sleeping

File size: 1,899 Bytes

64eb70f
ebfe80b
64eb70f
 
ebfe80b
64eb70f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebfe80b
64eb70f
ebfe80b
64eb70f
 
ebfe80b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64eb70f
 
 
ebfe80b
64eb70f
 
45289ed

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
from peft import PeftModel
import gradio as gr
import threading

# --- Load Model & Tokenizer ---

base_model_name = "unsloth/llama-3.2-3b-bnb-4bit"
adapter_model_name = "aismaanly/ai_synthetic"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

print("Loading PEFT adapter...")
model = PeftModel.from_pretrained(model, adapter_model_name)
model = model.merge_and_unload()
print("Model ready!")

# --- Gradio Streaming Function ---

def chat_fn(message, history, max_tokens):
    prompt = message
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    streamer = TextIteratorStreamer(
        tokenizer,
        skip_special_tokens=True
    )

    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=int(max_tokens),
        temperature=0.7,
    )

    thread = threading.Thread(
        target=model.generate,
        kwargs=generation_kwargs
    )
    thread.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield partial_text

# --- Create Dropdown Component for max tokens ---

dropdown = gr.Dropdown(
    choices=["100", "200", "300"],
    value="100",
    label="Max New Tokens"
)

# --- Launch Gradio Chat Interface ---

gr.ChatInterface(
    fn=chat_fn,
    additional_inputs=[dropdown],
    title="LLM Finetuned Comment Generator",
    description="Chat with the model.",
).launch(share=False)