Spaces:
Sleeping
Sleeping
import torch | |
import spaces | |
import gradio as gr | |
from transformers import pipeline | |
from threading import Thread | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
from peft import PeftModel | |
device = torch.device("cuda") | |
# Load the base model | |
base_model = "unsloth/deepseek-r1-distill-qwen-14b-unsloth-bnb-4bit" | |
model = AutoModelForCausalLM.from_pretrained( | |
base_model, | |
device_map="auto", | |
torch_dtype=torch.float16 | |
) | |
model = model.to(device) | |
tokenizer = AutoTokenizer.from_pretrained(base_model) | |
# Load your LORA adapter | |
lora_model = "nightscape/liberation-unleashed-DeepSeek-R1-Distill-Qwen-14B" | |
model = PeftModel.from_pretrained(model, lora_model) | |
model = model.to(device) | |
# Optionally, merge LORA weights with the base model | |
# model = model.merge_and_unload() | |
streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True) | |
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer) | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
generation_kwargs = dict( | |
messages, | |
streamer=streamer, | |
max_new_tokens=max_tokens, | |
num_return_sequences=1, | |
temperature=temperature, | |
top_p=top_p, | |
) | |
thread = Thread(target=generator, kwargs=generation_kwargs) | |
response = "" | |
for message in streamer: | |
print(message) | |
response += message | |
yield response | |
""" | |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
""" | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value="You are a guiding assistant on the Liberation Unleashed forum. You help users realize the truth of 'no separate self' by asking direct, experiential questions. Analyze the seeker's statements for signs of resistance or fear. If the seeker shows fear or unrealistic expectations, note that in the reasoning and plan a gentle approach. You do not lecture or use spiritual jargon, you keep the user focused on immediate experience. Ensure the reasoning concludes with a strategy that addresses the seeker's needs directly.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (nucleus sampling)", | |
), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |