Spaces:

FlameF0X
/

SnowflakeCore-Demo-Inteface

Sleeping

App Files Files Community

FlameF0X commited on May 18

Commit

1467791

verified ·

1 Parent(s): a7311db

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -231

app.py CHANGED Viewed

@@ -1,177 +1,85 @@
 import os
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
-from safetensors.torch import load_file  # Import safetensors for loading .safetensors models
 import datetime
-# Model Constants
-MODEL_ID_V1 = "FlameF0X/Snowflake-G0-Release"
-MODEL_ID_V2 = "FlameF0X/Snowflake-G0-Release-2"
-MODEL_ID_V3 = "FlameF0X/Snowflake-G0-Release-2.5"
 MAX_LENGTH = 384
-TEMPERATURE_MIN = 0.1
-TEMPERATURE_MAX = 2.0
 TEMPERATURE_DEFAULT = 0.7
-TOP_P_MIN = 0.1
-TOP_P_MAX = 1.0
 TOP_P_DEFAULT = 0.9
-TOP_K_MIN = 1
-TOP_K_MAX = 100
 TOP_K_DEFAULT = 40
-MAX_NEW_TOKENS_MIN = 16
-MAX_NEW_TOKENS_MAX = 1024
 MAX_NEW_TOKENS_DEFAULT = 256
-# CSS for the app
 css = """
-.gradio-container {
-    background-color: #1e1e2f !important;
-    color: #e0e0e0 !important;
-}
-.header {
-    background-color: #2b2b3c;
-    padding: 20px;
-    margin-bottom: 20px;
-    border-radius: 10px;
-    text-align: center;
-}
-.header h1 {
-    color: #66ccff;
-    margin-bottom: 10px;
-}
-.snowflake-icon {
-    font-size: 24px;
-    margin-right: 10px;
-}
-.footer {
-    text-align: center;
-    margin-top: 20px;
-    font-size: 0.9em;
-    color: #999;
-}
-.parameter-section {
-    background-color: #2a2a3a;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 15px;
-}
-.parameter-section h3 {
-    margin-top: 0;
-    color: #66ccff;
-}
-.example-section {
-    background-color: #223344;
-    padding: 15px;
-    border-radius: 8px;
-    margin-bottom: 15px;
-}
-.example-section h3 {
-    margin-top: 0;
-    color: #66ffaa;
-}
-.model-select {
-    background-color: #2a2a4a;
-    padding: 10px;
-    border-radius: 8px;
-    margin-bottom: 15px;
-}
 """
-# Global variables for models and tokenizers
-model_v1 = None
-tokenizer_v1 = None
-pipeline_v1 = None
-model_v2 = None
-tokenizer_v2 = None
-pipeline_v2 = None
-# Helper functions to load models
-def load_models_and_tokenizers():
-    global model_v1, tokenizer_v1, pipeline_v1, model_v2, tokenizer_v2, pipeline_v2
-    # Load the first model
-    print(f"Loading model from {MODEL_ID_V1}...")
-    tokenizer_v1 = AutoTokenizer.from_pretrained(MODEL_ID_V1)
-    if tokenizer_v1.pad_token is None:
-        tokenizer_v1.pad_token = tokenizer_v1.eos_token
-    model_file_path = os.path.join(MODEL_ID_V1, "model.safetensors")
-    if os.path.exists(model_file_path):
-        print("Loading model from safetensors file...")
-        model_v1 = load_file(model_file_path)
-    else:
-        print("Loading model from .bin file...")
-        model_v1 = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID_V1,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto"
-        )
-    pipeline_v1 = TextGenerationPipeline(
-        model=model_v1,
-        tokenizer=tokenizer_v1,
-        return_full_text=False,
-        max_length=MAX_LENGTH
-    )
-    # Load the second model
-    print(f"Loading model from {MODEL_ID_V2}...")
-    tokenizer_v2 = AutoTokenizer.from_pretrained(MODEL_ID_V2)
-    if tokenizer_v2.pad_token is None:
-        tokenizer_v2.pad_token = tokenizer_v2.eos_token
-    model_file_path = os.path.join(MODEL_ID_V2, "model.safetensors")
-    if os.path.exists(model_file_path):
-        print("Loading model from safetensors file...")
-        model_v2 = load_file(model_file_path)
-    else:
-        print("Loading model from .bin file...")
-        model_v2 = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID_V2,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            device_map="auto"
         )
-    pipeline_v2 = TextGenerationPipeline(
-        model=model_v2,
-        tokenizer=tokenizer_v2,
-        return_full_text=False,
-        max_length=MAX_LENGTH
-    )
-    return (model_v1, tokenizer_v1, pipeline_v1), (model_v2, tokenizer_v2, pipeline_v2)
-# Helper functions for generation
-def generate_text(
-    prompt,
-    model_version,
-    temperature=TEMPERATURE_DEFAULT,
-    top_p=TOP_P_DEFAULT,
-    top_k=TOP_K_DEFAULT,
-    max_new_tokens=MAX_NEW_TOKENS_DEFAULT,
-    history=None
-):
     if history is None:
         history = []
-    # Add current prompt to history
     history.append({"role": "user", "content": prompt})
     try:
-        # Select the appropriate pipeline based on model version
-        if model_version == "G0-Release":
-            pipeline = pipeline_v1
-            tokenizer = tokenizer_v1
-            model_name = "Snowflake-G0-Release"
-        else:  # "G0-Release-2"
-            pipeline = pipeline_v2
-            tokenizer = tokenizer_v2
-            model_name = "Snowflake-G0-Release-2"
-        # Generate response
         outputs = pipeline(
             prompt,
             do_sample=temperature > 0,
@@ -182,22 +90,15 @@ def generate_text(
             pad_token_id=tokenizer.pad_token_id,
             num_return_sequences=1
         )
         response = outputs[0]["generated_text"]
-        # Add model response to history
-        history.append({"role": "assistant", "content": response, "model": model_name})
-        # Format chat history for display
         formatted_history = []
         for entry in history:
-            if entry["role"] == "user":
-                role_prefix = "👤 User: "
-            else:
-                model_indicator = f"[{entry.get('model', 'Snowflake')}]"
-                role_prefix = f"❄️ {model_indicator}: "
-            formatted_history.append(f"{role_prefix}{entry['content']}")
         return response, history, "\n\n".join(formatted_history)
     except Exception as e:
@@ -208,19 +109,6 @@ def generate_text(
 def clear_conversation():
     return "", [], ""
-def apply_preset_example(example, history):
-    return example, history
-# Example prompts
-examples = [
-    "Write a short story about a snowflake that comes to life.",
-    "Explain the concept of artificial neural networks to a 10-year-old.",
-    "What are some interesting applications of natural language processing?",
-    "Write a haiku about programming.",
-    "Create a dialogue between two AI researchers discussing the future of language models."
-]
-# Main function
 def create_demo():
     with gr.Blocks(css=css) as demo:
         # Header
@@ -231,14 +119,12 @@ def create_demo():
         </div>
         """)
-        # Chat interface
         with gr.Column():
-            # Model selection
             with gr.Row(elem_classes="model-select"):
                 model_version = gr.Radio(
-                    ["G0-Release", "G0-Release-2"],
                     label="Select Model Version",
-                    value="G0-Release-2",
                     info="Choose which Snowflake model to use"
                 )
@@ -250,10 +136,8 @@ def create_demo():
                 interactive=False
             )
-            # Invisible state variables
             history_state = gr.State([])
-            # Input and output
             with gr.Row():
                 with gr.Column(scale=4):
                     prompt = gr.Textbox(
@@ -264,7 +148,7 @@ def create_demo():
                 with gr.Column(scale=1):
                     submit_btn = gr.Button("Send", variant="primary")
                     clear_btn = gr.Button("Clear Conversation")
             response_output = gr.Textbox(
                 value="",
                 label="Model Response",
@@ -273,106 +157,92 @@ def create_demo():
                 interactive=False
             )
-        # Advanced parameters
         with gr.Accordion("Generation Parameters", open=False):
             with gr.Column(elem_classes="parameter-section"):
                 with gr.Row():
                     with gr.Column():
                         temperature = gr.Slider(
-                            minimum=TEMPERATURE_MIN,
-                            maximum=TEMPERATURE_MAX,
-                            value=TEMPERATURE_DEFAULT,
-                            step=0.05,
-                            label="Temperature",
-                            info="Higher = more creative, Lower = more deterministic"
                         )
                         top_p = gr.Slider(
-                            minimum=TOP_P_MIN,
-                            maximum=TOP_P_MAX,
-                            value=TOP_P_DEFAULT,
-                            step=0.05,
-                            label="Top-p (nucleus sampling)",
-                            info="Controls diversity via cumulative probability"
                         )
                     with gr.Column():
                         top_k = gr.Slider(
-                            minimum=TOP_K_MIN,
-                            maximum=TOP_K_MAX,
-                            value=TOP_K_DEFAULT,
-                            step=1,
-                            label="Top-k",
-                            info="Limits word choice to top k options"
                         )
                         max_new_tokens = gr.Slider(
-                            minimum=MAX_NEW_TOKENS_MIN,
-                            maximum=MAX_NEW_TOKENS_MAX,
-                            value=MAX_NEW_TOKENS_DEFAULT,
-                            step=8,
-                            label="Maximum New Tokens",
-                            info="Controls the length of generated response"
                         )
-        # Examples
         with gr.Accordion("Example Prompts", open=True):
             with gr.Column(elem_classes="example-section"):
-                example_btn = gr.Examples(
                     examples=examples,
                     inputs=prompt,
                     label="Click on an example to try it",
                     examples_per_page=5
                 )
-        # Footer
         gr.HTML(f"""
         <div class="footer">
             <p>Snowflake Models Demo • Created with Gradio • {datetime.datetime.now().year}</p>
         </div>
         """)
-        # Set up interactions
         submit_btn.click(
             fn=generate_text,
             inputs=[prompt, model_version, temperature, top_p, top_k, max_new_tokens, history_state],
             outputs=[response_output, history_state, chat_history_display]
         )
         prompt.submit(
             fn=generate_text,
             inputs=[prompt, model_version, temperature, top_p, top_k, max_new_tokens, history_state],
             outputs=[response_output, history_state, chat_history_display]
         )
         clear_btn.click(
             fn=clear_conversation,
             inputs=[],
             outputs=[prompt, history_state, chat_history_display]
         )
     return demo
-# Load models and tokenizers
-print("Loading Snowflake models and tokenizers...")
 try:
-    (model_v1, tokenizer_v1, pipeline_v1), (model_v2, tokenizer_v2, pipeline_v2) = load_models_and_tokenizers()
-    print("Models loaded successfully!")
 except Exception as e:
-    print(f"Error loading models: {str(e)}")
-    # Create a simple error demo if models fail to load
-    with gr.Blocks(css=css) as error_demo:
         gr.HTML(f"""
         <div class="header" style="background-color: #ffebee;">
             <h1><span class="snowflake-icon">⚠️</span> Error Loading Models</h1>
             <p>There was a problem loading the Snowflake models: {str(e)}</p>
         </div>
         """)
-    demo = error_demo
-# Create and launch the demo
-demo = create_demo()
-# Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import os
 import torch
 import gradio as gr
 import datetime
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
+from safetensors.torch import load_file
+# Constants
+MODEL_CONFIG = {
+    "G0-Release": "FlameF0X/Snowflake-G0-Release",
+    "G0-Release-2": "FlameF0X/Snowflake-G0-Release-2",
+    "G0-Release-2.5": "FlameF0X/Snowflake-G0-Release-2.5"
+}
 MAX_LENGTH = 384
 TEMPERATURE_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.9
 TOP_K_DEFAULT = 40
 MAX_NEW_TOKENS_DEFAULT = 256
+# UI parameter bounds
+TEMPERATURE_MIN, TEMPERATURE_MAX = 0.1, 2.0
+TOP_P_MIN, TOP_P_MAX = 0.1, 1.0
+TOP_K_MIN, TOP_K_MAX = 1, 100
+MAX_NEW_TOKENS_MIN, MAX_NEW_TOKENS_MAX = 16, 1024
+# Styling
 css = """
+.gradio-container { background-color: #1e1e2f !important; color: #e0e0e0 !important; }
+.header { background-color: #2b2b3c; padding: 20px; margin-bottom: 20px; border-radius: 10px; text-align: center; }
+.header h1 { color: #66ccff; margin-bottom: 10px; }
+.snowflake-icon { font-size: 24px; margin-right: 10px; }
+.footer { text-align: center; margin-top: 20px; font-size: 0.9em; color: #999; }
+.parameter-section { background-color: #2a2a3a; padding: 15px; border-radius: 8px; margin-bottom: 15px; }
+.parameter-section h3 { margin-top: 0; color: #66ccff; }
+.example-section { background-color: #223344; padding: 15px; border-radius: 8px; margin-bottom: 15px; }
+.example-section h3 { margin-top: 0; color: #66ffaa; }
+.model-select { background-color: #2a2a4a; padding: 10px; border-radius: 8px; margin-bottom: 15px; }
 """
+# Model registry
+model_registry = {}
+def load_all_models():
+    for name, model_id in MODEL_CONFIG.items():
+        print(f"Loading model: {name} from {model_id}")
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        safetensor_path = os.path.join(model_id, "model.safetensors")
+        if os.path.exists(safetensor_path):
+            print("Loading from safetensors...")
+            model = load_file(safetensor_path)
+        else:
+            print("Loading from Hugging Face or .bin...")
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                device_map="auto"
+            )
+        pipeline = TextGenerationPipeline(
+            model=model,
+            tokenizer=tokenizer,
+            return_full_text=False,
+            max_length=MAX_LENGTH
         )
+        model_registry[name] = (model, tokenizer, pipeline)
+def generate_text(prompt, model_version, temperature, top_p, top_k, max_new_tokens, history=None):
     if history is None:
         history = []
     history.append({"role": "user", "content": prompt})
     try:
+        if model_version not in model_registry:
+            raise ValueError(f"Model '{model_version}' not found.")
+        _, tokenizer, pipeline = model_registry[model_version]
         outputs = pipeline(
             prompt,
             do_sample=temperature > 0,
             pad_token_id=tokenizer.pad_token_id,
             num_return_sequences=1
         )
         response = outputs[0]["generated_text"]
+        history.append({"role": "assistant", "content": response, "model": model_version})
         formatted_history = []
         for entry in history:
+            prefix = "👤 User: " if entry["role"] == "user" else f"❄️ [{entry.get('model', 'Model')}]: "
+            formatted_history.append(f"{prefix}{entry['content']}")
         return response, history, "\n\n".join(formatted_history)
     except Exception as e:
 def clear_conversation():
     return "", [], ""
 def create_demo():
     with gr.Blocks(css=css) as demo:
         # Header
         </div>
         """)
         with gr.Column():
             with gr.Row(elem_classes="model-select"):
                 model_version = gr.Radio(
+                    choices=list(MODEL_CONFIG.keys()),
+                    value=list(MODEL_CONFIG.keys())[0],
                     label="Select Model Version",
                     info="Choose which Snowflake model to use"
                 )
                 interactive=False
             )
             history_state = gr.State([])
             with gr.Row():
                 with gr.Column(scale=4):
                     prompt = gr.Textbox(
                 with gr.Column(scale=1):
                     submit_btn = gr.Button("Send", variant="primary")
                     clear_btn = gr.Button("Clear Conversation")
             response_output = gr.Textbox(
                 value="",
                 label="Model Response",
                 interactive=False
             )
+        # Generation Parameters
         with gr.Accordion("Generation Parameters", open=False):
             with gr.Column(elem_classes="parameter-section"):
                 with gr.Row():
                     with gr.Column():
                         temperature = gr.Slider(
+                            minimum=TEMPERATURE_MIN, maximum=TEMPERATURE_MAX,
+                            value=TEMPERATURE_DEFAULT, step=0.05,
+                            label="Temperature"
                         )
                         top_p = gr.Slider(
+                            minimum=TOP_P_MIN, maximum=TOP_P_MAX,
+                            value=TOP_P_DEFAULT, step=0.05,
+                            label="Top-p"
                         )
                     with gr.Column():
                         top_k = gr.Slider(
+                            minimum=TOP_K_MIN, maximum=TOP_K_MAX,
+                            value=TOP_K_DEFAULT, step=1,
+                            label="Top-k"
                         )
                         max_new_tokens = gr.Slider(
+                            minimum=MAX_NEW_TOKENS_MIN, maximum=MAX_NEW_TOKENS_MAX,
+                            value=MAX_NEW_TOKENS_DEFAULT, step=8,
+                            label="Maximum New Tokens"
                         )
+        # Example prompts
+        examples = [
+            "Write a short story about a snowflake that comes to life.",
+            "Explain the concept of artificial neural networks to a 10-year-old.",
+            "What are some interesting applications of natural language processing?",
+            "Write a haiku about programming.",
+            "Create a dialogue between two AI researchers discussing the future of language models."
+        ]
         with gr.Accordion("Example Prompts", open=True):
             with gr.Column(elem_classes="example-section"):
+                gr.Examples(
                     examples=examples,
                     inputs=prompt,
                     label="Click on an example to try it",
                     examples_per_page=5
                 )
         gr.HTML(f"""
         <div class="footer">
             <p>Snowflake Models Demo • Created with Gradio • {datetime.datetime.now().year}</p>
         </div>
         """)
+        # Interactions
         submit_btn.click(
             fn=generate_text,
             inputs=[prompt, model_version, temperature, top_p, top_k, max_new_tokens, history_state],
             outputs=[response_output, history_state, chat_history_display]
         )
         prompt.submit(
             fn=generate_text,
             inputs=[prompt, model_version, temperature, top_p, top_k, max_new_tokens, history_state],
             outputs=[response_output, history_state, chat_history_display]
         )
         clear_btn.click(
             fn=clear_conversation,
             inputs=[],
             outputs=[prompt, history_state, chat_history_display]
         )
     return demo
+# Initialize
+print("Loading Snowflake models...")
 try:
+    load_all_models()
+    print("All models loaded successfully!")
+    demo = create_demo()
 except Exception as e:
+    print(f"Error loading models: {e}")
+    with gr.Blocks(css=css) as demo:
         gr.HTML(f"""
         <div class="header" style="background-color: #ffebee;">
             <h1><span class="snowflake-icon">⚠️</span> Error Loading Models</h1>
             <p>There was a problem loading the Snowflake models: {str(e)}</p>
         </div>
         """)
+# Run app
 if __name__ == "__main__":
+    demo.launch()