|
import os |
|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
model_names = { |
|
"mistralai/Mistral-7B-Instruct-v0.3": "mistral-7b-instruct", |
|
"BICORP/Lake-1-Advanced": "lake-1-advanced" |
|
} |
|
|
|
|
|
def download_model(repo_id): |
|
model_path = hf_hub_download(repo_id=repo_id, token=HF_TOKEN) |
|
return model_path |
|
|
|
|
|
models = {} |
|
tokenizers = {} |
|
|
|
for name in model_names.keys(): |
|
model_path = download_model(name) |
|
models[name] = AutoModelForCausalLM.from_pretrained(model_path) |
|
tokenizers[name] = AutoTokenizer.from_pretrained(model_path) |
|
|
|
|
|
presets = { |
|
"mistralai/Mistral-7B-Instruct-v0.3": { |
|
"Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8}, |
|
"Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75}, |
|
"Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60}, |
|
"Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7}, |
|
}, |
|
"BICORP/Lake-1-Advanced": { |
|
"Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9}, |
|
"Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95}, |
|
"Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90}, |
|
"Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75}, |
|
} |
|
} |
|
|
|
|
|
system_messages = { |
|
"mistralai/Mistral-7B-Instruct-v0.3": "Your name is Lake 1 Base but mine is User", |
|
"BICORP/Lake-1-Advanced": "Your name is Lake 1 Advanced [Alpha] but mine is User or what I will type as my name" |
|
} |
|
|
|
|
|
model_choices = [ |
|
("mistralai/Mistral-7B-Instruct-v0.3", "Lake 1 Base"), |
|
("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]") |
|
] |
|
|
|
|
|
pseudonyms = [model[1] for model in model_choices] |
|
|
|
def respond( |
|
message, |
|
history: list, |
|
model_name, |
|
preset_name |
|
): |
|
|
|
model = models[model_name] |
|
tokenizer = tokenizers[model_name] |
|
|
|
|
|
system_message = system_messages[model_name] |
|
|
|
|
|
input_text = system_message + "\n" + "\n".join([f"{val['role']}: {val['content']}" for val in history]) + f"\n:User {message}\n" |
|
|
|
|
|
inputs = tokenizer.encode(input_text, return_tensors='pt') |
|
|
|
|
|
preset = presets[model_name][preset_name] |
|
max_tokens = preset["max_tokens"] |
|
temperature = preset["temperature"] |
|
top_p = preset["top_p"] |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs, |
|
max_length=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
do_sample=True |
|
) |
|
|
|
|
|
final_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
assistant_response = final_response.split(":User ")[-1].strip |
|
history.append({"role": "User ", "content": message}) |
|
history.append({"role": "Assistant", "content": assistant_response}) |
|
|
|
return assistant_response, history |
|
|
|
|
|
def launch_interface(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Chat with Lake 1 Models") |
|
|
|
model_selector = gr.Dropdown(choices=pseudonyms, label="Select Model") |
|
preset_selector = gr.Dropdown(choices=["Fast", "Normal", "Quality", "Unreal Performance"], label="Select Preset") |
|
message_input = gr.Textbox(label="Your Message") |
|
chat_history = gr.Chatbox(label="Chat History") |
|
|
|
def update_model(selected_model): |
|
return model_names[pseudonyms.index(selected_model)] |
|
|
|
model_selector.change(update_model, inputs=model_selector, outputs=model_selector) |
|
|
|
def submit_message(message, history, model_name, preset_name): |
|
return respond(message, history, model_name, preset_name) |
|
|
|
submit_button = gr.Button("Send") |
|
submit_button.click(submit_message, inputs=[message_input, chat_history, model_selector, preset_selector], outputs=[chat_history, chat_history]) |
|
|
|
demo.launch() |
|
|
|
if __name__ == "__main__": |
|
launch_interface() |