nightscape's picture
Prepare for using a GPU
1ceb162
import torch
import spaces
import gradio as gr
from transformers import pipeline
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from peft import PeftModel
device = torch.device("cuda")
# Load the base model
base_model = "unsloth/deepseek-r1-distill-qwen-14b-unsloth-bnb-4bit"
model = AutoModelForCausalLM.from_pretrained(
base_model,
device_map="auto",
torch_dtype=torch.float16
)
model = model.to(device)
tokenizer = AutoTokenizer.from_pretrained(base_model)
# Load your LORA adapter
lora_model = "nightscape/liberation-unleashed-DeepSeek-R1-Distill-Qwen-14B"
model = PeftModel.from_pretrained(model, lora_model)
model = model.to(device)
# Optionally, merge LORA weights with the base model
# model = model.merge_and_unload()
streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
generation_kwargs = dict(
messages,
streamer=streamer,
max_new_tokens=max_tokens,
num_return_sequences=1,
temperature=temperature,
top_p=top_p,
)
thread = Thread(target=generator, kwargs=generation_kwargs)
response = ""
for message in streamer:
print(message)
response += message
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a guiding assistant on the Liberation Unleashed forum. You help users realize the truth of 'no separate self' by asking direct, experiential questions. Analyze the seeker's statements for signs of resistance or fear. If the seeker shows fear or unrealistic expectations, note that in the reasoning and plan a gentle approach​. You do not lecture or use spiritual jargon, you keep the user focused on immediate experience. Ensure the reasoning concludes with a strategy that addresses the seeker's needs directly.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()