blockenters's picture
add
14e3057
import torch
from transformers import pipeline
import gradio as gr
# ๋ชจ๋ธ ๋กœ๋“œ
def load_model(model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"):
pipe = pipeline(
"text-generation",
model=model_name,
device_map="sequential",
torch_dtype=torch.float16,
trust_remote_code=True,
truncation=True,
max_new_tokens=2048,
model_kwargs={
"low_cpu_mem_usage": True,
"offload_folder": "offload"
}
)
return pipe
# ์ฑ—๋ด‡ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
def chat(message, history):
pipe = load_model()
prompt = f"Human: {message}\n\nAssistant:"
# ๋ชจ๋ธ ์ƒ์„ฑ
response = pipe(
prompt,
max_new_tokens=2048,
temperature=0.7,
do_sample=True,
truncation=True,
pad_token_id=50256
)
# ์‘๋‹ต ์ถ”์ถœ ๋ฐ ์ฒ˜๋ฆฌ
try:
bot_text = response[0]["generated_text"]
bot_text = bot_text.split("Assistant:")[-1].strip()
if "</think>" in bot_text:
bot_text = bot_text.split("</think>")[-1].strip()
except:
bot_text = "Sorry, there was a problem generating the response."
return bot_text
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
demo = gr.ChatInterface(
chat,
chatbot=gr.Chatbot(
height=600,
type="messages" # ๋ฉ”์‹œ์ง€ ํ˜•์‹ ์ง€์ •
),
textbox=gr.Textbox(placeholder="Enter your message...", container=False, scale=7),
title="DeepSeek-R1 Chatbot",
description="DeepSeek-R1-Distill-Qwen-1.5B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ๋Œ€ํ™” ํ…Œ์ŠคํŠธ์šฉ ๋ฐ๋ชจ์ž…๋‹ˆ๋‹ค.",
examples=["Hello", "Who are you?", "What can you do?"],
theme=gr.themes.Soft(),
type="messages" # ChatInterface์˜ type๋„ ์ง€์ •
)
# ์„œ๋ฒ„ ์‹คํ–‰
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")