BICORP commited on
Commit
30b93f3
·
verified ·
1 Parent(s): fd01a8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -71
app.py CHANGED
@@ -1,41 +1,45 @@
1
- import gradio as gr
2
  import os
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
4
 
5
- # Get your Hugging Face token from the environment variable
6
- hf_token = os.getenv("HF_TOKEN") # Ensure that this environment variable is set
7
 
8
- # Set paths for local model storage
9
- cache_dir = "./cache" # Specify your cache directory within the Space
10
- os.makedirs(cache_dir, exist_ok=True) # Create cache directory if it doesn't exist
11
-
12
- # Load models and tokenizers locally (or download if not available)
13
- model_paths = {
14
- "mistralai/Mistral-7B-Instruct-v0.3": os.path.join(cache_dir, "mistral-7b-instruct"),
15
- "BICORP/Lake-1-Advanced": os.path.join(cache_dir, "lake-1-advanced")
16
  }
17
 
 
 
 
 
 
 
18
  models = {}
19
  tokenizers = {}
20
 
21
- # Load models and tokenizers from specified local paths or download
22
- for model_name, path in model_paths.items():
23
- models[model_name] = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=path, token=hf_token)
24
- tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name, cache_dir=path, token=hf_token)
25
 
26
  # Define presets for each model
27
  presets = {
28
  "mistralai/Mistral-7B-Instruct-v0.3": {
29
- "Fast": {"max_new_tokens": 256, "temperature": 1.0, "top_p": 0.8},
30
- "Normal": {"max_new_tokens": 512, "temperature": 0.6, "top_p": 0.75},
31
- "Quality": {"max_new_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
32
- "Unreal Performance": {"max_new_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
33
  },
34
  "BICORP/Lake-1-Advanced": {
35
- "Fast": {"max_new_tokens": 800, "temperature": 1.0, "top_p": 0.9},
36
- "Normal": {"max_new_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
37
- "Quality": {"max_new_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
38
- "Unreal Performance": {"max_new_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
39
  }
40
  }
41
 
@@ -51,67 +55,76 @@ model_choices = [
51
  ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
52
  ]
53
 
54
- # Extract pseudonyms for the dropdown
55
  pseudonyms = [model[1] for model in model_choices]
56
 
57
- def respond(message, history: list, model_name, preset_name):
58
- """
59
- Generate a response from the selected model based on the user's message and chat history.
60
- """
 
 
 
61
  model = models[model_name]
62
  tokenizer = tokenizers[model_name]
 
 
63
  system_message = system_messages[model_name]
64
 
65
- messages = [{"role": "system", "content": system_message}]
 
66
 
67
- for val in history:
68
- if isinstance(val, dict) and 'role' in val and 'content' in val:
69
- messages.append({"role": val['role'], "content": val['content']})
70
 
71
- messages.append({"role": "user", "content": message})
72
-
73
- # Prepare the input for the model
74
- inputs = tokenizer([messages], return_tensors="pt", padding=True, truncation=True)
75
-
76
  # Get the preset settings
77
  preset = presets[model_name][preset_name]
78
- max_new_tokens = preset["max_new_tokens"]
79
  temperature = preset["temperature"]
80
  top_p = preset["top_p"]
81
 
82
- # Generate the response from the model
83
- response = model.generate(
84
- input_ids=inputs['input_ids'],
85
- attention_mask=inputs['attention_mask'],
86
- max_new_tokens=max_new_tokens,
87
- temperature=temperature,
88
- top_p=top_p,
89
- )
90
-
91
- # Decode the generated response
92
- final_response = tokenizer.decode(response[0], skip_special_tokens=True)
93
 
94
- return final_response
95
-
96
- def respond_with_pseudonym(message, history: list, selected_model, selected_preset):
97
- """
98
- Handle the user's message and determine which model to use based on the selected pseudonym.
99
- """
100
- try:
101
- model_name = next(model[0] for model in model_choices if model[1] == selected_model)
102
- except StopIteration:
103
- return f"Error: The selected model '{selected_model}' is not valid. Please select a valid model."
104
-
105
- return respond(message, history, model_name, selected_preset)
106
-
107
- # Gradio Chat Interface
108
- demo = gr.ChatInterface(
109
- fn=respond_with_pseudonym,
110
- additional_inputs=[
111
- gr.Dropdown(choices=pseudonyms, label="Select Model", value=pseudonyms[0]),
112
- gr.Dropdown(choices=list(presets[model_choices[0][0]].keys()), label="Select Preset", value="Fast")
113
- ],
114
- )
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  if __name__ == "__main__":
117
- demo.launch()
 
 
1
  import os
2
+ import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+ from huggingface_hub import hf_hub_download
6
 
7
+ # Set your Hugging Face token
8
+ HF_TOKEN = os.getenv("HF_TOKEN")
9
 
10
+ # Define model names and their local paths
11
+ model_names = {
12
+ "mistralai/Mistral-7B-Instruct-v0.3": "mistral-7b-instruct",
13
+ "BICORP/Lake-1-Advanced": "lake-1-advanced"
 
 
 
 
14
  }
15
 
16
+ # Download models and tokenizers using the Hugging Face Hub
17
+ def download_model(repo_id):
18
+ model_path = hf_hub_download(repo_id=repo_id, token=HF_TOKEN)
19
+ return model_path
20
+
21
+ # Load models and tokenizers
22
  models = {}
23
  tokenizers = {}
24
 
25
+ for name in model_names.keys():
26
+ model_path = download_model(name)
27
+ models[name] = AutoModelForCausalLM.from_pretrained(model_path)
28
+ tokenizers[name] = AutoTokenizer.from_pretrained(model_path)
29
 
30
  # Define presets for each model
31
  presets = {
32
  "mistralai/Mistral-7B-Instruct-v0.3": {
33
+ "Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8},
34
+ "Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75},
35
+ "Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
36
+ "Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
37
  },
38
  "BICORP/Lake-1-Advanced": {
39
+ "Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9},
40
+ "Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
41
+ "Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
42
+ "Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
43
  }
44
  }
45
 
 
55
  ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
56
  ]
57
 
58
+ # Convert pseudonyms to model names for the dropdown
59
  pseudonyms = [model[1] for model in model_choices]
60
 
61
+ def respond(
62
+ message,
63
+ history: list,
64
+ model_name,
65
+ preset_name
66
+ ):
67
+ # Get the correct model and tokenizer
68
  model = models[model_name]
69
  tokenizer = tokenizers[model_name]
70
+
71
+ # Get the system message for the model
72
  system_message = system_messages[model_name]
73
 
74
+ # Prepare the input for the model
75
+ input_text = system_message + "\n" + "\n".join([f"{val['role']}: {val['content']}" for val in history]) + f"\n:User {message}\n"
76
 
77
+ # Tokenize the input
78
+ inputs = tokenizer.encode(input_text, return_tensors='pt')
 
79
 
 
 
 
 
 
80
  # Get the preset settings
81
  preset = presets[model_name][preset_name]
82
+ max_tokens = preset["max_tokens"]
83
  temperature = preset["temperature"]
84
  top_p = preset["top_p"]
85
 
86
+ # Generate response
87
+ with torch.no_grad():
88
+ outputs = model.generate(
89
+ inputs,
90
+ max_length=max_tokens,
91
+ temperature=temperature,
92
+ top_p=top_p,
93
+ do_sample=True
94
+ )
 
 
95
 
96
+ # Decode the response
97
+ final_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
98
+
99
+ # Extract the assistant's response
100
+ assistant_response = final_response.split(":User ")[-1].strip # Append the user message and assistant response to the history
101
+ history.append({"role": "User ", "content": message})
102
+ history.append({"role": "Assistant", "content": assistant_response})
103
+
104
+ return assistant_response, history
105
+
106
+ # Gradio interface
107
+ def launch_interface():
108
+ with gr.Blocks() as demo:
109
+ gr.Markdown("## Chat with Lake 1 Models")
110
+
111
+ model_selector = gr.Dropdown(choices=pseudonyms, label="Select Model")
112
+ preset_selector = gr.Dropdown(choices=["Fast", "Normal", "Quality", "Unreal Performance"], label="Select Preset")
113
+ message_input = gr.Textbox(label="Your Message")
114
+ chat_history = gr.Chatbox(label="Chat History")
115
+
116
+ def update_model(selected_model):
117
+ return model_names[pseudonyms.index(selected_model)]
118
+
119
+ model_selector.change(update_model, inputs=model_selector, outputs=model_selector)
120
+
121
+ def submit_message(message, history, model_name, preset_name):
122
+ return respond(message, history, model_name, preset_name)
123
+
124
+ submit_button = gr.Button("Send")
125
+ submit_button.click(submit_message, inputs=[message_input, chat_history, model_selector, preset_selector], outputs=[chat_history, chat_history])
126
+
127
+ demo.launch()
128
 
129
  if __name__ == "__main__":
130
+ launch_interface()