Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -91,36 +91,16 @@ class LocalInferenceClient:
|
|
91 |
|
92 |
# Specify the model paths for gguf models
|
93 |
model_configs = {
|
94 |
-
"
|
95 |
-
"path": r"
|
96 |
"specs": """
|
97 |
## Lake 1 Chat Specifications
|
98 |
-
- **Architecture**:
|
99 |
-
- **Parameters**:
|
100 |
-
- **Capabilities**:
|
101 |
-
- **Intended Use**:
|
102 |
"""
|
103 |
-
}
|
104 |
-
"Lake 1 Mini": {
|
105 |
-
"path": r"C:\Users\BI Corp\Videos\main\Lake-1-mini\Lake-1-Mini.gguf",
|
106 |
-
"specs": """
|
107 |
-
## Lake 1 Mini Specifications
|
108 |
-
- **Architecture**: Lake 1
|
109 |
-
- **Parameters**: 6B
|
110 |
-
- **Capabilities**: Quick responses, compact model
|
111 |
-
- **Intended Use**: Great for fast responses and lightweight use cases.
|
112 |
-
"""
|
113 |
-
},
|
114 |
-
"Lake 1 Base": {
|
115 |
-
"path": r"C:\Users\BI Corp\Videos\main\Lake-1-base\Lake-1-Base.gguf",
|
116 |
-
"specs": """
|
117 |
-
## Lake 1 Base Specifications
|
118 |
-
- **Architecture**: Lake 1
|
119 |
-
- **Parameters**: 12B
|
120 |
-
- **Capabilities**: Balanced performance between speed and accuracy
|
121 |
-
- **Intended Use**: Best for use cases requiring a balance of speed and detail in responses.
|
122 |
-
"""
|
123 |
-
},
|
124 |
}
|
125 |
|
126 |
# Set up a dictionary mapping model names to their clients
|
@@ -128,17 +108,7 @@ clients = {name: LocalInferenceClient(name, config['path']) for name, config in
|
|
128 |
|
129 |
# Presets for performance/quality tradeoffs
|
130 |
presets = {
|
131 |
-
"
|
132 |
-
"Fast": {"max_new_tokens": 100, "temperature": 1.0, "top_p": 0.9},
|
133 |
-
"Normal": {"max_new_tokens": 200, "temperature": 0.7, "top_p": 0.95},
|
134 |
-
"Quality": {"max_new_tokens": 300, "temperature": 0.5, "top_p": 0.90},
|
135 |
-
},
|
136 |
-
"Lake 1 Base": {
|
137 |
-
"Fast": {"max_new_tokens": 100, "temperature": 1.0, "top_p": 0.9},
|
138 |
-
"Normal": {"max_new_tokens": 200, "temperature": 0.7, "top_p": 0.95},
|
139 |
-
"Quality": {"max_new_tokens": 300, "temperature": 0.5, "top_p": 0.90},
|
140 |
-
},
|
141 |
-
"Lake 1 Chat": {
|
142 |
"Fast": {"max_new_tokens": 100, "temperature": 1.0, "top_p": 0.9},
|
143 |
"Normal": {"max_new_tokens": 200, "temperature": 0.7, "top_p": 0.95},
|
144 |
"Quality": {"max_new_tokens": 300, "temperature": 0.5, "top_p": 0.90},
|
@@ -147,10 +117,8 @@ presets = {
|
|
147 |
|
148 |
# A system prompt for the model
|
149 |
system_messages = {
|
150 |
-
"
|
151 |
-
|
152 |
-
"Lake 1 Base": "You are Lake 1 Base, a powerful open-source original model. Think and answer step by step but balance speed and accuracy.",
|
153 |
-
}
|
154 |
|
155 |
def generate_response(message: str, model_name: str, preset: str) -> str:
|
156 |
"""
|
@@ -212,7 +180,7 @@ with gr.Blocks(title="BI CORP AI Assistant", theme="soft") as demo:
|
|
212 |
interactive=True
|
213 |
)
|
214 |
model_info_md = gr.Markdown(
|
215 |
-
value=model_configs["
|
216 |
label="π Model Specifications"
|
217 |
)
|
218 |
|
@@ -220,7 +188,7 @@ with gr.Blocks(title="BI CORP AI Assistant", theme="soft") as demo:
|
|
220 |
chat_interface = gr.ChatInterface(
|
221 |
fn=handle_chat,
|
222 |
additional_inputs=[model_dropdown, preset_dropdown],
|
223 |
-
examples=[["Explain quantum computing", "
|
224 |
chatbot=gr.Chatbot(height=600, label="π¬ Conversation", show_copy_button=True),
|
225 |
textbox=gr.Textbox(placeholder="Type your message...", container=False, scale=7, autofocus=True),
|
226 |
submit_btn=gr.Button("π Send", variant="primary")
|
|
|
91 |
|
92 |
# Specify the model paths for gguf models
|
93 |
model_configs = {
|
94 |
+
"Test": {
|
95 |
+
"path": r"./test-model.gguf",
|
96 |
"specs": """
|
97 |
## Lake 1 Chat Specifications
|
98 |
+
- **Architecture**: Test
|
99 |
+
- **Parameters**: IDK
|
100 |
+
- **Capabilities**: test
|
101 |
+
- **Intended Use**: test
|
102 |
"""
|
103 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
}
|
105 |
|
106 |
# Set up a dictionary mapping model names to their clients
|
|
|
108 |
|
109 |
# Presets for performance/quality tradeoffs
|
110 |
presets = {
|
111 |
+
"Test": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"Fast": {"max_new_tokens": 100, "temperature": 1.0, "top_p": 0.9},
|
113 |
"Normal": {"max_new_tokens": 200, "temperature": 0.7, "top_p": 0.95},
|
114 |
"Quality": {"max_new_tokens": 300, "temperature": 0.5, "top_p": 0.90},
|
|
|
117 |
|
118 |
# A system prompt for the model
|
119 |
system_messages = {
|
120 |
+
"Test": "You are Lake 1 Chat, a powerful open-source reasoning model. Think carefully and answer step by step.",
|
121 |
+
}
|
|
|
|
|
122 |
|
123 |
def generate_response(message: str, model_name: str, preset: str) -> str:
|
124 |
"""
|
|
|
180 |
interactive=True
|
181 |
)
|
182 |
model_info_md = gr.Markdown(
|
183 |
+
value=model_configs["Test"]["specs"],
|
184 |
label="π Model Specifications"
|
185 |
)
|
186 |
|
|
|
188 |
chat_interface = gr.ChatInterface(
|
189 |
fn=handle_chat,
|
190 |
additional_inputs=[model_dropdown, preset_dropdown],
|
191 |
+
examples=[["Explain quantum computing", "Test", "Normal"]],
|
192 |
chatbot=gr.Chatbot(height=600, label="π¬ Conversation", show_copy_button=True),
|
193 |
textbox=gr.Textbox(placeholder="Type your message...", container=False, scale=7, autofocus=True),
|
194 |
submit_btn=gr.Button("π Send", variant="primary")
|