Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,45 @@
|
|
1 |
-
import gradio as gr
|
2 |
import os
|
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
# Load models and tokenizers locally (or download if not available)
|
13 |
-
model_paths = {
|
14 |
-
"mistralai/Mistral-7B-Instruct-v0.3": os.path.join(cache_dir, "mistral-7b-instruct"),
|
15 |
-
"BICORP/Lake-1-Advanced": os.path.join(cache_dir, "lake-1-advanced")
|
16 |
}
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
models = {}
|
19 |
tokenizers = {}
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
models[
|
24 |
-
tokenizers[
|
25 |
|
26 |
# Define presets for each model
|
27 |
presets = {
|
28 |
"mistralai/Mistral-7B-Instruct-v0.3": {
|
29 |
-
"Fast": {"
|
30 |
-
"Normal": {"
|
31 |
-
"Quality": {"
|
32 |
-
"Unreal Performance": {"
|
33 |
},
|
34 |
"BICORP/Lake-1-Advanced": {
|
35 |
-
"Fast": {"
|
36 |
-
"Normal": {"
|
37 |
-
"Quality": {"
|
38 |
-
"Unreal Performance": {"
|
39 |
}
|
40 |
}
|
41 |
|
@@ -51,67 +55,76 @@ model_choices = [
|
|
51 |
("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
|
52 |
]
|
53 |
|
54 |
-
#
|
55 |
pseudonyms = [model[1] for model in model_choices]
|
56 |
|
57 |
-
def respond(
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
61 |
model = models[model_name]
|
62 |
tokenizer = tokenizers[model_name]
|
|
|
|
|
63 |
system_message = system_messages[model_name]
|
64 |
|
65 |
-
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
messages.append({"role": val['role'], "content": val['content']})
|
70 |
|
71 |
-
messages.append({"role": "user", "content": message})
|
72 |
-
|
73 |
-
# Prepare the input for the model
|
74 |
-
inputs = tokenizer([messages], return_tensors="pt", padding=True, truncation=True)
|
75 |
-
|
76 |
# Get the preset settings
|
77 |
preset = presets[model_name][preset_name]
|
78 |
-
|
79 |
temperature = preset["temperature"]
|
80 |
top_p = preset["top_p"]
|
81 |
|
82 |
-
# Generate
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
# Decode the generated response
|
92 |
-
final_response = tokenizer.decode(response[0], skip_special_tokens=True)
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
"""
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
gr.
|
112 |
-
gr.
|
113 |
-
|
114 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
if __name__ == "__main__":
|
117 |
-
|
|
|
|
|
1 |
import os
|
2 |
+
import gradio as gr
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
import torch
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
|
7 |
+
# Set your Hugging Face token
|
8 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
9 |
|
10 |
+
# Define model names and their local paths
|
11 |
+
model_names = {
|
12 |
+
"mistralai/Mistral-7B-Instruct-v0.3": "mistral-7b-instruct",
|
13 |
+
"BICORP/Lake-1-Advanced": "lake-1-advanced"
|
|
|
|
|
|
|
|
|
14 |
}
|
15 |
|
16 |
+
# Download models and tokenizers using the Hugging Face Hub
|
17 |
+
def download_model(repo_id):
|
18 |
+
model_path = hf_hub_download(repo_id=repo_id, token=HF_TOKEN)
|
19 |
+
return model_path
|
20 |
+
|
21 |
+
# Load models and tokenizers
|
22 |
models = {}
|
23 |
tokenizers = {}
|
24 |
|
25 |
+
for name in model_names.keys():
|
26 |
+
model_path = download_model(name)
|
27 |
+
models[name] = AutoModelForCausalLM.from_pretrained(model_path)
|
28 |
+
tokenizers[name] = AutoTokenizer.from_pretrained(model_path)
|
29 |
|
30 |
# Define presets for each model
|
31 |
presets = {
|
32 |
"mistralai/Mistral-7B-Instruct-v0.3": {
|
33 |
+
"Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8},
|
34 |
+
"Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75},
|
35 |
+
"Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
|
36 |
+
"Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
|
37 |
},
|
38 |
"BICORP/Lake-1-Advanced": {
|
39 |
+
"Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9},
|
40 |
+
"Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
|
41 |
+
"Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
|
42 |
+
"Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
|
43 |
}
|
44 |
}
|
45 |
|
|
|
55 |
("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
|
56 |
]
|
57 |
|
58 |
+
# Convert pseudonyms to model names for the dropdown
|
59 |
pseudonyms = [model[1] for model in model_choices]
|
60 |
|
61 |
+
def respond(
|
62 |
+
message,
|
63 |
+
history: list,
|
64 |
+
model_name,
|
65 |
+
preset_name
|
66 |
+
):
|
67 |
+
# Get the correct model and tokenizer
|
68 |
model = models[model_name]
|
69 |
tokenizer = tokenizers[model_name]
|
70 |
+
|
71 |
+
# Get the system message for the model
|
72 |
system_message = system_messages[model_name]
|
73 |
|
74 |
+
# Prepare the input for the model
|
75 |
+
input_text = system_message + "\n" + "\n".join([f"{val['role']}: {val['content']}" for val in history]) + f"\n:User {message}\n"
|
76 |
|
77 |
+
# Tokenize the input
|
78 |
+
inputs = tokenizer.encode(input_text, return_tensors='pt')
|
|
|
79 |
|
|
|
|
|
|
|
|
|
|
|
80 |
# Get the preset settings
|
81 |
preset = presets[model_name][preset_name]
|
82 |
+
max_tokens = preset["max_tokens"]
|
83 |
temperature = preset["temperature"]
|
84 |
top_p = preset["top_p"]
|
85 |
|
86 |
+
# Generate response
|
87 |
+
with torch.no_grad():
|
88 |
+
outputs = model.generate(
|
89 |
+
inputs,
|
90 |
+
max_length=max_tokens,
|
91 |
+
temperature=temperature,
|
92 |
+
top_p=top_p,
|
93 |
+
do_sample=True
|
94 |
+
)
|
|
|
|
|
95 |
|
96 |
+
# Decode the response
|
97 |
+
final_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
98 |
+
|
99 |
+
# Extract the assistant's response
|
100 |
+
assistant_response = final_response.split(":User ")[-1].strip # Append the user message and assistant response to the history
|
101 |
+
history.append({"role": "User ", "content": message})
|
102 |
+
history.append({"role": "Assistant", "content": assistant_response})
|
103 |
+
|
104 |
+
return assistant_response, history
|
105 |
+
|
106 |
+
# Gradio interface
|
107 |
+
def launch_interface():
|
108 |
+
with gr.Blocks() as demo:
|
109 |
+
gr.Markdown("## Chat with Lake 1 Models")
|
110 |
+
|
111 |
+
model_selector = gr.Dropdown(choices=pseudonyms, label="Select Model")
|
112 |
+
preset_selector = gr.Dropdown(choices=["Fast", "Normal", "Quality", "Unreal Performance"], label="Select Preset")
|
113 |
+
message_input = gr.Textbox(label="Your Message")
|
114 |
+
chat_history = gr.Chatbox(label="Chat History")
|
115 |
+
|
116 |
+
def update_model(selected_model):
|
117 |
+
return model_names[pseudonyms.index(selected_model)]
|
118 |
+
|
119 |
+
model_selector.change(update_model, inputs=model_selector, outputs=model_selector)
|
120 |
+
|
121 |
+
def submit_message(message, history, model_name, preset_name):
|
122 |
+
return respond(message, history, model_name, preset_name)
|
123 |
+
|
124 |
+
submit_button = gr.Button("Send")
|
125 |
+
submit_button.click(submit_message, inputs=[message_input, chat_history, model_selector, preset_selector], outputs=[chat_history, chat_history])
|
126 |
+
|
127 |
+
demo.launch()
|
128 |
|
129 |
if __name__ == "__main__":
|
130 |
+
launch_interface()
|