Spaces:

nightscape
/

liberation-unleashed

Sleeping

App Files Files Community

liberation-unleashed / app.py

nightscape

Prepare for using a GPU

1ceb162 17 days ago

raw

history blame contribute delete

3.02 kB

	import torch
	import spaces

	import gradio as gr
	from transformers import pipeline
	from threading import Thread

	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from peft import PeftModel

	device = torch.device("cuda")

	# Load the base model
	base_model = "unsloth/deepseek-r1-distill-qwen-14b-unsloth-bnb-4bit"

	model = AutoModelForCausalLM.from_pretrained(
	base_model,
	device_map="auto",
	torch_dtype=torch.float16
	)
	model = model.to(device)
	tokenizer = AutoTokenizer.from_pretrained(base_model)

	# Load your LORA adapter
	lora_model = "nightscape/liberation-unleashed-DeepSeek-R1-Distill-Qwen-14B"
	model = PeftModel.from_pretrained(model, lora_model)


	model = model.to(device)

	# Optionally, merge LORA weights with the base model
	# model = model.merge_and_unload()
	streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
	generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

	@spaces.GPU
	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	generation_kwargs = dict(
	messages,
	streamer=streamer,
	max_new_tokens=max_tokens,
	num_return_sequences=1,
	temperature=temperature,
	top_p=top_p,
	)
	thread = Thread(target=generator, kwargs=generation_kwargs)
	response = ""
	for message in streamer:
	print(message)

	response += message
	yield response


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a guiding assistant on the Liberation Unleashed forum. You help users realize the truth of 'no separate self' by asking direct, experiential questions. Analyze the seeker's statements for signs of resistance or fear. If the seeker shows fear or unrealistic expectations, note that in the reasoning and plan a gentle approach. You do not lecture or use spiritual jargon, you keep the user focused on immediate experience. Ensure the reasoning concludes with a strategy that addresses the seeker's needs directly.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)


	if __name__ == "__main__":
	demo.launch()