Spaces:

rider-provider-777
/

training_bench

Sleeping

App Files Files Community

rider-provider-777 commited on 6 days ago

Commit

43d68ab

verified ·

1 Parent(s): 62aa251

Upload 2 files

Browse files

Files changed (2) hide show

app/ui/ui_colab.py +80 -0
app/ui/ui_spaces.py +69 -0

app/ui/ui_colab.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import gradio as gr
+import os
+from app.core.orchestrator import launch_training
+from app.utils.validation import safe_tag
+DEFAULTS = {'tokenizer_name': 'google/gemma-3-270m', 'block_size': 512}
+ALGOS_BY_ARCH = {'Official Gemma (Baseline)': ['backprop'], 'Custom Research Transformer (Experimental)': ['feedback_alignment','synthetic_gradients','forward_forward']}
+def on_arch_change(arch):
+    return gr.update(choices=ALGOS_BY_ARCH[arch], value=ALGOS_BY_ARCH[arch][0])
+def run_job(exp_tag, arch, dataset, tokenizer_name, algorithm, batch_size, epochs, max_batches,
+            grad_accum, lr, weight_decay, main_lr, synth_lr, ff_lr, ff_thresh, block_size, push_github):
+    try:
+        safe_tag(exp_tag)
+    except Exception as e:
+        yield f"❌ Invalid experiment tag: {e}"
+        return
+    params = {'epochs': int(epochs), 'grad_accum_steps': int(grad_accum)}
+    if algorithm == 'backprop':
+        params.update({'learning_rate': float(lr), 'weight_decay': float(weight_decay)})
+    elif algorithm == 'synthetic_gradients':
+        params.update({'main_learning_rate': float(main_lr), 'synth_learning_rate': float(synth_lr)})
+    elif algorithm == 'feedback_alignment':
+        params.update({'learning_rate': float(lr)})
+    elif algorithm == 'forward_forward':
+        params.update({'learning_rate': float(ff_lr), 'threshold': float(ff_thresh)})
+    results_file = os.path.join('local_results', f"{exp_tag}_results.json")
+    cfg = {
+        'exp_tag': exp_tag,
+        'model_architecture': arch,
+        'algorithm': algorithm,
+        'dataset_name': dataset,
+        'tokenizer_name': tokenizer_name or DEFAULTS['tokenizer_name'],
+        'batch_size': int(batch_size),
+        'results_file': results_file,
+        'params': params,
+        'max_batches_per_epoch': int(max_batches),
+        'block_size': int(block_size or DEFAULTS['block_size']),
+    }
+    if push_github and (not os.getenv('GITHUB_TOKEN') or not os.getenv('GITHUB_RESULTS_REPO')):
+        yield "❌ To push results to GitHub, set GITHUB_TOKEN and GITHUB_RESULTS_REPO environment variables in Colab (do not print them)."
+        return
+    yield from launch_training(cfg, timeout_sec=60*60*3)
+with gr.Blocks(theme=gr.themes.Soft()) as colab:
+    gr.Markdown("# 🧪 LLM Algorithm Lab — Colab UI (Full)")
+    with gr.Row():
+        with gr.Column():
+            exp_tag = gr.Textbox(label="Experiment Tag", value="exp")
+            arch = gr.Dropdown(choices=list(ALGOS_BY_ARCH.keys()), value="Official Gemma (Baseline)")
+            algo = gr.Dropdown(choices=ALGOS_BY_ARCH["Official Gemma (Baseline)"], value="backprop")
+            arch.change(on_arch_change, inputs=arch, outputs=algo)
+            dataset = gr.Textbox(label="HF Dataset", value="stas/tiny-stories")
+            tokenizer_name = gr.Textbox(label="Tokenizer/Model ID", value="google/gemma-3-270m")
+            block_size = gr.Slider(128, 2048, value=512, step=64, label="Block Size")
+            batch_size = gr.Slider(1, 256, value=16, step=1, label="Batch Size")
+            epochs = gr.Slider(1, 50, value=3, step=1, label="Epochs")
+            max_batches = gr.Slider(0, 50000, value=0, step=50, label="Max Batches / Epoch (0=full)")
+            grad_accum = gr.Slider(1, 64, value=1, step=1, label="Grad Accum (baseline)")
+            with gr.Accordion("Optimizer / LR", open=False):
+                lr = gr.Number(label="LR (BP/FA)", value=5e-5)
+                weight_decay = gr.Number(label="Weight Decay (BP)", value=0.01)
+                main_lr = gr.Number(label="Main LR (SG)", value=1e-5)
+                synth_lr = gr.Number(label="Synth LR (SG)", value=1e-4)
+                ff_lr = gr.Number(label="Layer LR (FF)", value=1e-4)
+                ff_thresh = gr.Number(label="Goodness Threshold (FF)", value=2.0)
+        with gr.Column():
+            push_github = gr.Checkbox(label="Push results to GitHub (requires token in env)", value=False)
+            start = gr.Button("Start Training", variant="primary")
+            logs = gr.Textbox(label="Live Logs", lines=26)
+            start.click(run_job,
+                        inputs=[exp_tag, arch, dataset, tokenizer_name, algo, batch_size, epochs, max_batches, grad_accum, lr, weight_decay, main_lr, synth_lr, ff_lr, ff_thresh, block_size, push_github],
+                        outputs=logs)
+def build():
+    return colab

app/ui/ui_spaces.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+import os
+from app.core.orchestrator import launch_training
+DEMO_LIMITS = {'max_batches_per_epoch': 100, 'batch_size': 8, 'epochs': 1}
+# Wrapper that streams logs and appends them
+def on_start(exp_tag, model_arch, dataset, batch_size, algorithm, epochs, max_batches):
+    cfg = {
+        'exp_tag': exp_tag or 'demo',
+        'model_architecture': model_arch,
+        'algorithm': algorithm,
+        'dataset_name': dataset,
+        'batch_size': min(int(batch_size), DEMO_LIMITS['batch_size']),
+        'results_file': os.path.join('local_results', f"{exp_tag or 'demo'}_results.json"),
+        'params': {'epochs': min(int(epochs), DEMO_LIMITS['epochs'])},
+        'max_batches_per_epoch': min(int(max_batches), DEMO_LIMITS['max_batches_per_epoch']),
+        'tokenizer_name': 'google/gemma-3-270m',
+        'block_size': 256,
+    }
+    logs_accum = ""  # keep full history here
+    for log_line in launch_training(cfg, timeout_sec=20*60):
+        logs_accum += log_line + "\n"   # append new line
+        yield logs_accum                 # send full accumulated logs
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧪 LLM Algorithm Lab — Demo (Spaces)")
+    with gr.Row():
+        with gr.Column():
+            exp_tag = gr.Textbox(label="Experiment Tag", value="demo")
+            model_arch = gr.Dropdown(
+                choices=["Official Gemma (Baseline)", "Custom Research Transformer (Experimental)"],
+                value="Official Gemma (Baseline)",
+                interactive=True,
+                allow_custom_value=False
+            )
+            algorithm = gr.Dropdown(
+                choices=["backprop", "feedback_alignment", "synthetic_gradients", "forward_forward"],
+                value="backprop",
+                interactive=True,
+                allow_custom_value=False
+            )
+            dataset = gr.Textbox(
+                label="HF Dataset",
+                value="roneneldan/TinyStories"
+            )
+            batch_size = gr.Slider(1, 16, value=8, step=1, label="Batch Size (clamped)")
+            epochs = gr.Slider(1, 3, value=1, step=1, label="Epochs (demo)")
+            max_batches = gr.Slider(10, 500, value=100, step=10, label="Max Batches / Epoch")
+        with gr.Column():
+            start = gr.Button("Start Demo Run", variant="primary")
+            logs = gr.Textbox(label="Live Logs", lines=20)
+            start.click(
+                on_start,
+                inputs=[exp_tag, model_arch, dataset, batch_size, algorithm, epochs, max_batches],
+                outputs=logs
+            )
+def build():
+    return demo