Spaces:

tchung1970
/

openai-gpt-oss-20b-ko

Sleeping

App Files Files

openai-gpt-oss-20b-ko / app.py

tchung1970

Update app.py

1aec078 verified 26 days ago

raw

history blame

3.98 kB

	from transformers import pipeline, TextIteratorStreamer
	import torch
	from threading import Thread
	import gradio as gr
	import spaces
	import re

	model_id = "openai/gpt-oss-20b"

	pipe = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype="auto",
	device_map="auto",
	)
	def format_conversation_history(chat_history):
	messages = []
	for item in chat_history:
	role = item["role"]
	content = item["content"]
	if isinstance(content, list):
	content = content[0]["text"] if content and "text" in content[0] else str(content)
	messages.append({"role": role, "content": content})
	return messages

	@spaces.GPU()
	def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
	new_message = {"role": "user", "content": input_data}
	system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
	processed_history = format_conversation_history(chat_history)
	messages = system_message + processed_history + [new_message]
	streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = {
	"max_new_tokens": max_new_tokens,
	"do_sample": True,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k,
	"repetition_penalty": repetition_penalty,
	"streamer": streamer
	}
	thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
	thread.start()
	# simple formatting without harmony because of no tool usage etc. and experienced hf space problems with harmony
	thinking = ""
	final = ""
	started_final = False
	for chunk in streamer:
	if not started_final:
	if "assistantfinal" in chunk.lower():
	split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
	thinking += split_parts[0]
	final += split_parts[1]
	started_final = True
	else:
	thinking += chunk
	else:
	final += chunk
	clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
	clean_final = final.strip()
	formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
	yield formatted

	demo = gr.ChatInterface(
	fn=generate_response,
	additional_inputs=[
	gr.Slider(label="Max new tokens", minimum=64, maximum=4096, step=1, value=2048),
	gr.Textbox(
	label="System Prompt",
	value="You are a helpful assistant. Reasoning: medium",
	lines=4,
	placeholder="Change system prompt"
	),
	gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
	gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
	gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
	gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
	],
	examples=[
	[{"text": "화성은 지구에서 얼마나 떨어져 있으며, 오늘날의 로켓으로는 그곳까지 가는 데 얼마나 걸리나요?"}],
	[{"text": "우주에서는 왜 우주인들이 떠 있는 것처럼 보이나요?"}],
	[{"text": "왜 우리는 지구에서 항상 달의 같은 면만 볼 수 있나요?"}],
	],
	cache_examples=False,
	type="messages",
	description="""# 오픈에이아이 GPT-OSS-20B
	아래 입력란에 질문을 입력하고 리턴 키를 누르거나, 3개의 예시 질문 중 하나를 클릭하세요.""",
	fill_height=True,
	textbox=gr.Textbox(
	label="Query Input",
	placeholder="질문을 입력하고 리턴 키를 누르세요"
	),
	stop_btn="Stop Generation",
	multimodal=False,
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	demo.launch(share=True)