Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,64 +1,76 @@ | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
            -
             | 
|  | |
|  | |
|  | |
| 3 |  | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 |  | 
|  | |
| 9 |  | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
                max_tokens,
         | 
| 15 | 
            -
                temperature,
         | 
| 16 | 
            -
                top_p,
         | 
| 17 | 
            -
            ):
         | 
| 18 | 
            -
                messages = [{"role": "system", "content": system_message}]
         | 
| 19 |  | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
                        messages.append({"role": "assistant", "content": val[1]})
         | 
| 25 |  | 
| 26 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 27 |  | 
| 28 | 
            -
                 | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                     | 
| 32 | 
            -
                     | 
| 33 | 
            -
                     | 
| 34 | 
             
                    temperature=temperature,
         | 
|  | |
| 35 | 
             
                    top_p=top_p,
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                     | 
| 38 | 
            -
             | 
| 39 | 
            -
                     | 
| 40 | 
            -
                     | 
|  | |
|  | |
| 41 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
            """
         | 
| 46 | 
             
            demo = gr.ChatInterface(
         | 
| 47 | 
            -
                 | 
| 48 | 
             
                additional_inputs=[
         | 
| 49 | 
            -
                    gr. | 
| 50 | 
            -
                    gr.Slider( | 
| 51 | 
            -
                    gr.Slider( | 
| 52 | 
            -
                    gr.Slider(
         | 
| 53 | 
            -
                        minimum=0.1,
         | 
| 54 | 
            -
                        maximum=1.0,
         | 
| 55 | 
            -
                        value=0.95,
         | 
| 56 | 
            -
                        step=0.05,
         | 
| 57 | 
            -
                        label="Top-p (nucleus sampling)",
         | 
| 58 | 
            -
                    ),
         | 
| 59 | 
             
                ],
         | 
|  | |
| 60 | 
             
            )
         | 
| 61 |  | 
| 62 | 
            -
             | 
| 63 | 
             
            if __name__ == "__main__":
         | 
| 64 | 
             
                demo.launch()
         | 
|  | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
            +
            import torch
         | 
| 3 | 
            +
            from beeper_model import BeeperRoseGPT, generate  # assumed modular split
         | 
| 4 | 
            +
            from tokenizers import Tokenizer
         | 
| 5 | 
            +
            from huggingface_hub import hf_hub_download
         | 
| 6 |  | 
| 7 | 
            +
            # ----------------------------
         | 
| 8 | 
            +
            # 🔧 Load Model and Tokenizer
         | 
| 9 | 
            +
            # ----------------------------
         | 
| 10 | 
            +
            config = {
         | 
| 11 | 
            +
                "context": 512,
         | 
| 12 | 
            +
                "vocab_size": 8192,
         | 
| 13 | 
            +
                "dim": 512,
         | 
| 14 | 
            +
                "n_heads": 8,
         | 
| 15 | 
            +
                "n_layers": 6,
         | 
| 16 | 
            +
                "mlp_ratio": 4.0,
         | 
| 17 | 
            +
                "temperature": 0.9,
         | 
| 18 | 
            +
                "top_k": 40,
         | 
| 19 | 
            +
                "top_p": 0.9,
         | 
| 20 | 
            +
                "repetition_penalty": 1.1,
         | 
| 21 | 
            +
                "presence_penalty": 0.6,
         | 
| 22 | 
            +
                "frequency_penalty": 0.0,
         | 
| 23 | 
            +
                "tokenizer_path": "beeper.tokenizer.json"
         | 
| 24 | 
            +
            }
         | 
| 25 |  | 
| 26 | 
            +
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 27 |  | 
| 28 | 
            +
            # Load weights from Hugging Face repo if not available locally
         | 
| 29 | 
            +
            repo_id = "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512"
         | 
| 30 | 
            +
            model_file = hf_hub_download(repo_id=repo_id, filename="beeper_final.safetensors")
         | 
| 31 | 
            +
            tokenizer_file = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 32 |  | 
| 33 | 
            +
            infer = BeeperRoseGPT(config).to(device)
         | 
| 34 | 
            +
            infer.load_state_dict(torch.load(model_file, map_location=device))
         | 
| 35 | 
            +
            infer.eval()
         | 
| 36 | 
            +
            tok = Tokenizer.from_file(tokenizer_file)
         | 
|  | |
| 37 |  | 
| 38 | 
            +
            # ----------------------------
         | 
| 39 | 
            +
            # 💬 Gradio Chat Wrapper
         | 
| 40 | 
            +
            # ----------------------------
         | 
| 41 | 
            +
            def beeper_reply(message, history, temperature, top_k, top_p):
         | 
| 42 | 
            +
                prompt = "\n".join([f"User: {h[0]}\nBeeper: {h[1]}" for h in history if h[0] and h[1]])
         | 
| 43 | 
            +
                prompt += f"\nUser: {message}\nBeeper:"
         | 
| 44 |  | 
| 45 | 
            +
                out = generate(
         | 
| 46 | 
            +
                    model=infer,
         | 
| 47 | 
            +
                    tok=tok,
         | 
| 48 | 
            +
                    cfg=config,
         | 
| 49 | 
            +
                    prompt=prompt,
         | 
| 50 | 
            +
                    max_new_tokens=128,
         | 
| 51 | 
             
                    temperature=temperature,
         | 
| 52 | 
            +
                    top_k=top_k,
         | 
| 53 | 
             
                    top_p=top_p,
         | 
| 54 | 
            +
                    repetition_penalty=config["repetition_penalty"],
         | 
| 55 | 
            +
                    presence_penalty=config["presence_penalty"],
         | 
| 56 | 
            +
                    frequency_penalty=config["frequency_penalty"],
         | 
| 57 | 
            +
                    device=device,
         | 
| 58 | 
            +
                    detokenize=True
         | 
| 59 | 
            +
                )
         | 
| 60 | 
            +
                yield out
         | 
| 61 |  | 
| 62 | 
            +
            # ----------------------------
         | 
| 63 | 
            +
            # 🖼️ Interface
         | 
| 64 | 
            +
            # ----------------------------
         | 
|  | |
| 65 | 
             
            demo = gr.ChatInterface(
         | 
| 66 | 
            +
                beeper_reply,
         | 
| 67 | 
             
                additional_inputs=[
         | 
| 68 | 
            +
                    gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature"),
         | 
| 69 | 
            +
                    gr.Slider(1, 100, value=40, step=1, label="Top-k"),
         | 
| 70 | 
            +
                    gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 71 | 
             
                ],
         | 
| 72 | 
            +
                chatbot=gr.Chatbot(label="Hello I'm Beeper (Rose-based LLM)! Please be friendly I don't know very much yet!")
         | 
| 73 | 
             
            )
         | 
| 74 |  | 
|  | |
| 75 | 
             
            if __name__ == "__main__":
         | 
| 76 | 
             
                demo.launch()
         | 
