import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# デバイス設定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# モデルとトークナイザーの読み込み
model_name = "inu-ai/dolly-japanese-gpt-1b"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# チャットボット関数
def chatbot(input_text, chat_history):
    if chat_history is None:
        chat_history = []

    # 入力の前処理
    new_input = "ユーザー: " + input_text + " ボット:"
    print(f"Input to the model: {new_input}")  # デバッグ用

    # トークナイズ
    inputs = tokenizer(new_input, return_tensors="pt", padding=True).to(device)
    print(f"Tokenized input: {inputs}")  # トークン化された入力の確認

    # 応答の生成
    outputs = model.generate(
        inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_length=512,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_p=0.95,  # 生成におけるランダム性を調整
        temperature=0.7  # ランダム性の調整
    )

    # 応答のデコード（skip_special_tokens=Falseにして特殊トークンをデバッグ）
    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    print(f"Generated response (with special tokens): {response}")  # 生成された応答の確認

    # 応答の整形
    response = response.split("ボット:")[-1].strip()
    
    # チャット履歴に追加（辞書形式に変換）
    chat_history.append({"role": "user", "content": input_text})
    chat_history.append({"role": "assistant", "content": response})

    return chat_history, chat_history

# Gradioインターフェース設定
interface = gr.Interface(
    fn=chatbot,
    inputs=[
        gr.Textbox(label="ユーザー入力", placeholder="ここに入力してください"),
        gr.State()  # チャット履歴
    ],
    outputs=[
        gr.Chatbot(label="ボット応答", type="messages"),  # 出力形式をmessagesに指定
        gr.State()  # チャット履歴の状態
    ],
    title="日本語チャットボット",
    description="inu-ai/dolly-japanese-gpt-1b を使用した日本語チャットボットです。",
)

# アプリの起動
interface.launch()