Spaces:

BICORP
/

dhgtfrd

Runtime error

App Files Files Community

BICORP commited on Feb 1

Commit

30b93f3

verified ·

1 Parent(s): fd01a8f

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -71

app.py CHANGED Viewed

@@ -1,41 +1,45 @@
-import gradio as gr
 import os
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Get your Hugging Face token from the environment variable
-hf_token = os.getenv("HF_TOKEN")  # Ensure that this environment variable is set
-# Set paths for local model storage
-cache_dir = "./cache"  # Specify your cache directory within the Space
-os.makedirs(cache_dir, exist_ok=True)  # Create cache directory if it doesn't exist
-# Load models and tokenizers locally (or download if not available)
-model_paths = {
-    "mistralai/Mistral-7B-Instruct-v0.3": os.path.join(cache_dir, "mistral-7b-instruct"),
-    "BICORP/Lake-1-Advanced": os.path.join(cache_dir, "lake-1-advanced")
 }
 models = {}
 tokenizers = {}
-# Load models and tokenizers from specified local paths or download
-for model_name, path in model_paths.items():
-    models[model_name] = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=path, token=hf_token)
-    tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name, cache_dir=path, token=hf_token)
 # Define presets for each model
 presets = {
     "mistralai/Mistral-7B-Instruct-v0.3": {
-        "Fast": {"max_new_tokens": 256, "temperature": 1.0, "top_p": 0.8},
-        "Normal": {"max_new_tokens": 512, "temperature": 0.6, "top_p": 0.75},
-        "Quality": {"max_new_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
-        "Unreal Performance": {"max_new_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
     },
     "BICORP/Lake-1-Advanced": {
-        "Fast": {"max_new_tokens": 800, "temperature": 1.0, "top_p": 0.9},
-        "Normal": {"max_new_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
-        "Quality": {"max_new_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
-        "Unreal Performance": {"max_new_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
     }
 }
@@ -51,67 +55,76 @@ model_choices = [
     ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
 ]
-# Extract pseudonyms for the dropdown
 pseudonyms = [model[1] for model in model_choices]
-def respond(message, history: list, model_name, preset_name):
-    """
-    Generate a response from the selected model based on the user's message and chat history.
-    """
     model = models[model_name]
     tokenizer = tokenizers[model_name]
     system_message = system_messages[model_name]
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if isinstance(val, dict) and 'role' in val and 'content' in val:
-            messages.append({"role": val['role'], "content": val['content']})
-    messages.append({"role": "user", "content": message})
-    # Prepare the input for the model
-    inputs = tokenizer([messages], return_tensors="pt", padding=True, truncation=True)
     # Get the preset settings
     preset = presets[model_name][preset_name]
-    max_new_tokens = preset["max_new_tokens"]
     temperature = preset["temperature"]
     top_p = preset["top_p"]
-    # Generate the response from the model
-    response = model.generate(
-        input_ids=inputs['input_ids'],
-        attention_mask=inputs['attention_mask'],
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        top_p=top_p,
-    )
-    # Decode the generated response
-    final_response = tokenizer.decode(response[0], skip_special_tokens=True)
-    return final_response
-def respond_with_pseudonym(message, history: list, selected_model, selected_preset):
-    """
-    Handle the user's message and determine which model to use based on the selected pseudonym.
-    """
-    try:
-        model_name = next(model[0] for model in model_choices if model[1] == selected_model)
-    except StopIteration:
-        return f"Error: The selected model '{selected_model}' is not valid. Please select a valid model."
-    return respond(message, history, model_name, selected_preset)
-# Gradio Chat Interface
-demo = gr.ChatInterface(
-    fn=respond_with_pseudonym,
-    additional_inputs=[
-        gr.Dropdown(choices=pseudonyms, label="Select Model", value=pseudonyms[0]),
-        gr.Dropdown(choices=list(presets[model_choices[0][0]].keys()), label="Select Preset", value="Fast")
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import os
+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from huggingface_hub import hf_hub_download
+# Set your Hugging Face token
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Define model names and their local paths
+model_names = {
+    "mistralai/Mistral-7B-Instruct-v0.3": "mistral-7b-instruct",
+    "BICORP/Lake-1-Advanced": "lake-1-advanced"
 }
+# Download models and tokenizers using the Hugging Face Hub
+def download_model(repo_id):
+    model_path = hf_hub_download(repo_id=repo_id, token=HF_TOKEN)
+    return model_path
+# Load models and tokenizers
 models = {}
 tokenizers = {}
+for name in model_names.keys():
+    model_path = download_model(name)
+    models[name] = AutoModelForCausalLM.from_pretrained(model_path)
+    tokenizers[name] = AutoTokenizer.from_pretrained(model_path)
 # Define presets for each model
 presets = {
     "mistralai/Mistral-7B-Instruct-v0.3": {
+        "Fast": {"max_tokens": 256, "temperature": 1.0, "top_p": 0.8},
+        "Normal": {"max_tokens": 512, "temperature": 0.6, "top_p": 0.75},
+        "Quality": {"max_tokens": 1024, "temperature": 0.45, "top_p": 0.60},
+        "Unreal Performance": {"max_tokens": 1048, "temperature": 0.5, "top_p": 0.7},
     },
     "BICORP/Lake-1-Advanced": {
+        "Fast": {"max_tokens": 800, "temperature": 1.0, "top_p": 0.9},
+        "Normal": {"max_tokens": 4000, "temperature": 0.7, "top_p": 0.95},
+        "Quality": {"max_tokens": 32000, "temperature": 0.5, "top_p": 0.90},
+        "Unreal Performance": {"max_tokens": 128000, "temperature": 0.6, "top_p": 0.75},
     }
 }
     ("BICORP/Lake-1-Advanced", "Lake 1 Advanced [Alpha]")
 ]
+# Convert pseudonyms to model names for the dropdown
 pseudonyms = [model[1] for model in model_choices]
+def respond(
+    message,
+    history: list,
+    model_name,
+    preset_name
+):
+    # Get the correct model and tokenizer
     model = models[model_name]
     tokenizer = tokenizers[model_name]
+    # Get the system message for the model
     system_message = system_messages[model_name]
+    # Prepare the input for the model
+    input_text = system_message + "\n" + "\n".join([f"{val['role']}: {val['content']}" for val in history]) + f"\n:User  {message}\n"
+    # Tokenize the input
+    inputs = tokenizer.encode(input_text, return_tensors='pt')
     # Get the preset settings
     preset = presets[model_name][preset_name]
+    max_tokens = preset["max_tokens"]
     temperature = preset["temperature"]
     top_p = preset["top_p"]
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_length=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True
+        )
+    # Decode the response
+    final_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract the assistant's response
+    assistant_response = final_response.split(":User ")[-1].strip # Append the user message and assistant response to the history
+    history.append({"role": "User ", "content": message})
+    history.append({"role": "Assistant", "content": assistant_response})
+    return assistant_response, history
+# Gradio interface
+def launch_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("## Chat with Lake 1 Models")
+        model_selector = gr.Dropdown(choices=pseudonyms, label="Select Model")
+        preset_selector = gr.Dropdown(choices=["Fast", "Normal", "Quality", "Unreal Performance"], label="Select Preset")
+        message_input = gr.Textbox(label="Your Message")
+        chat_history = gr.Chatbox(label="Chat History")
+        def update_model(selected_model):
+            return model_names[pseudonyms.index(selected_model)]
+        model_selector.change(update_model, inputs=model_selector, outputs=model_selector)
+        def submit_message(message, history, model_name, preset_name):
+            return respond(message, history, model_name, preset_name)
+        submit_button = gr.Button("Send")
+        submit_button.click(submit_message, inputs=[message_input, chat_history, model_selector, preset_selector], outputs=[chat_history, chat_history])
+    demo.launch()
 if __name__ == "__main__":
+    launch_interface()