import torch from transformers import pipeline import gradio as gr # 모델 로드 def load_model(model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"): pipe = pipeline( "text-generation", model=model_name, device_map="sequential", torch_dtype=torch.float16, trust_remote_code=True, truncation=True, max_new_tokens=2048, model_kwargs={ "low_cpu_mem_usage": True, "offload_folder": "offload" } ) return pipe # 챗봇 응답 생성 함수 def chat(message, history): pipe = load_model() prompt = f"Human: {message}\n\nAssistant:" # 모델 생성 response = pipe( prompt, max_new_tokens=2048, temperature=0.7, do_sample=True, truncation=True, pad_token_id=50256 ) # 응답 추출 및 처리 try: bot_text = response[0]["generated_text"] bot_text = bot_text.split("Assistant:")[-1].strip() if "" in bot_text: bot_text = bot_text.split("")[-1].strip() except: bot_text = "Sorry, there was a problem generating the response." return bot_text # Gradio 인터페이스 설정 demo = gr.ChatInterface( chat, chatbot=gr.Chatbot( height=600, type="messages" # 메시지 형식 지정 ), textbox=gr.Textbox(placeholder="Enter your message...", container=False, scale=7), title="DeepSeek-R1 Chatbot", description="DeepSeek-R1-Distill-Qwen-1.5B 모델을 사용한 대화 테스트용 데모입니다.", examples=["Hello", "Who are you?", "What can you do?"], theme=gr.themes.Soft(), type="messages" # ChatInterface의 type도 지정 ) # 서버 실행 if __name__ == "__main__": demo.launch(server_name="0.0.0.0")