import torch
from transformers import pipeline
import gradio as gr

# 모델 로드
def load_model(model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"):
    pipe = pipeline(
        "text-generation",
        model=model_name,
        device_map="sequential",
        torch_dtype=torch.float16,
        trust_remote_code=True,
        truncation=True,
        max_new_tokens=2048,
        model_kwargs={
            "low_cpu_mem_usage": True,
            "offload_folder": "offload"
        }
    )
    return pipe

# 챗봇 응답 생성 함수
def chat(message, history):
    pipe = load_model()
    prompt = f"Human: {message}\n\nAssistant:"
    
    # 모델 생성
    response = pipe(
        prompt,
        max_new_tokens=2048,
        temperature=0.7,
        do_sample=True,
        truncation=True,
        pad_token_id=50256
    )
    
    # 응답 추출 및 처리
    try:
        bot_text = response[0]["generated_text"]
        bot_text = bot_text.split("Assistant:")[-1].strip()
        if "</think>" in bot_text:
            bot_text = bot_text.split("</think>")[-1].strip()
    except:
        bot_text = "Sorry, there was a problem generating the response."
    
    return bot_text

# Gradio 인터페이스 설정
demo = gr.ChatInterface(
    chat,
    chatbot=gr.Chatbot(
        height=600,
        type="messages"  # 메시지 형식 지정
    ),
    textbox=gr.Textbox(placeholder="Enter your message...", container=False, scale=7),
    title="DeepSeek-R1 Chatbot",
    description="DeepSeek-R1-Distill-Qwen-1.5B 모델을 사용한 대화 테스트용 데모입니다.",
    examples=["Hello", "Who are you?", "What can you do?"],
    theme=gr.themes.Soft(),
    type="messages"  # ChatInterface의 type도 지정
)

# 서버 실행
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0")