Spaces:

simonou99
/

simple_dual_llm_chatbot

Sleeping

File size: 5,502 Bytes

import os
os.system('pip install zhipuai')
import gradio as gr
from zhipuai import ZhipuAI
import json

def convert_to_openai_format(nested_chat):
    openai_format = []
    for dialogue in nested_chat:
        user_dialogue = {"role": "user", "content": dialogue[0]}
        assistant_dialogue = {"role": "assistant", "content": dialogue[1]}
        openai_format.extend([user_dialogue, assistant_dialogue])
    return openai_format

def master_llm(user_prompt, history, api_key):
    # 生成针对专家LLM的系统提示
    # 示例: 根据用户提问生成一个简单的系统提示
    if history != []:
        last_round = history[-1]
        last_record_text = f"'''\n# 用户:\n{last_round[0]}\n\n\n# AI:\n{last_round[1]}\n\n\n# 用户:\n{user_prompt}\n'''"
    else:
        last_record_text = f"'''\n# 用户:\n{user_prompt}\n'''"
    syst_prompt = """根据用户与AI的对话或提问，判断未来对话需要什么领域专家，并写出对应领域的AI专家的system prompt。
    
以以下JSON型式返回，请严格遵守`{}`与`""`的闭合（注意，所有参数都是string）：

```
{
    "expert_system_prompt":"你是一个...AI，你有着...的经验...，你的思维...。现在，你的任务是...",
    "temperature":"0.01",
    "top_p":"0.99"
}
```

参数解释：
temperature为AI回复时的随机程度，值越小意味着回答逻辑越发散。取值为(0,1)，但不能等于0或1。
top_p为AI会考虑的回复候选采样范围，比如0.1指只会选择前10%最佳的候选回复项。取值为(0,1)，但不能等于0或1。
技巧：一般来说如果需要创意类型的AI，就会让这两个参数的值高一些；
如果需要严格服从型的AI，则需要temperature与top_p尽量低一点；

注意，请不要刻意生成专家，如果无法判断需要什么领域的专家（比如无上下文，用户乱问乱答），则直接回复此默认设定：
```
{{
    "expert_system_prompt":"根据用户的发送的信息（如有上下文，请根据上下文），合适地回应用户。",
    "temperature":"0.5",
    "top_p":"0.5"
}}
```
    """
    messages = [
        {"role":"system","content":syst_prompt},
        {"role":"user","content":last_record_text}
    ]

    client = ZhipuAI(api_key=api_key)
    response = client.chat.completions.create(
        model = "glm-4",
        messages = messages,
        temperature = 0.01,
        top_p = 0.01,
        do_sample = True
    )

    response_text = response.choices[0].message.content
    response_json = json.loads(response_text[response_text.find('{'):response_text.rfind('}')+1])
    expert_system_prompt = response_json['expert_system_prompt']
    temperature = response_json['temperature']
    top_p = response_json['top_p']
    print(response_text)
    
    return expert_system_prompt, temperature, top_p

def expert_llm(user_prompt, history, expert_system_prompt, temperature, top_p, api_key):
    client = ZhipuAI(api_key=api_key)
    if history != []:
        prompt_records = convert_to_openai_format(history)
        messages = [{"role":"system","content":expert_system_prompt}] + prompt_records + [{"role":"user","content":user_prompt}]
    else:
        messages = [{"role":"system","content":expert_system_prompt},{"role":"user","content":user_prompt}]
    response = client.chat.completions.create(
        model = "glm-4",
        messages = messages,
        temperature = float(temperature),
        top_p = float(top_p),
        do_sample = True
    )
    return response.choices[0].message.content

def gradio_fn(message, history, api_key):
    expert_system_prompt, temperature, top_p = master_llm(message, history, api_key)
    expert_response = expert_llm(message, history, expert_system_prompt, temperature, top_p, api_key)
    return expert_response

with gr.Blocks() as demo:
    gr.Markdown(
        """
        ---
        title: Advanced Dual Prompting
        emoji: 🏆
        colorFrom: green
        colorTo: purple
        sdk: gradio
        sdk_version: 4.16.0
        app_file: app.py
        pinned: false
        ---

        # Simple Dual LLM Chatbot
        
        This is a playground for testing out Standford's 'Meta-Prompting' logic ([paper link](https://arxiv.org/abs/2401.12954)), in whcih for every user request, it first passes the request to a 'meta' bot, the 'meta' bot will then generate a system prompt of a field-related 'Expert' bot for answering user's request.  
        That is, for each round, the LLM should accordingly assigns the best expert for answering user's specific request.  
        Standford claimed that this simple implementation result in a 60%+ better accuracy compared to a standard 'syst_prompt + chat_history' logic.  
        Hence, one can't be too curious in checking it out, here is a simple implemnetaion for everybody to play around.  

        Something to keep in mind:
        1. Currently it requires an api key from chatglm (get one here if you don't have one: [link](https://open.bigmodel.cn/usercenter/apikeys))
        2. To balance contextual-understanding and token-saving, the meta bot's logic is modified to have access to only the last round of chat and the current user request when 'generating' an expert.
        """
    )
    api_key = gr.Textbox(label="api_key", placeholder='Enter your chatglm API key here......')
    main_interface = gr.ChatInterface(fn=gradio_fn, additional_inputs=api_key)

if __name__ == "__main__":
    demo.launch(show_error=True)