import gradio as gr import torch import numpy as np from model import Transformer from transformers import AutoTokenizer # pip install transformers from utils import ( DEVICE, DROPOUT, NUM_EMBED, NUM_HEAD, NUM_LAYER, BLOCK_SIZE, encode, decode ) tokenizer = AutoTokenizer.from_pretrained("Neu256/PromeTokenizer") vocab_size = tokenizer.vocab_size # train a new model model = Transformer( vocab_size=vocab_size, num_embed=NUM_EMBED, block_size=BLOCK_SIZE, num_heads=NUM_HEAD, num_layers=NUM_LAYER, dropout=DROPOUT ) # load model to GPU if available m = model.to(DEVICE) # print the number of parameters in the model m.load_state_dict(torch.load("base_model_1.pth", map_location=torch.device(DEVICE))) m.eval() #print( # "Model with {:.2f}M parameters".format(sum(p.numel() for p in m.parameters()) / 1e6) #) def model_generate(text, number_of_new_token, temperature, top_p): context = encode(str(text), tokenizer).unsqueeze(0).to(DEVICE) return decode(enc_sec=m.generate(idx=context, max_new_tokens=number_of_new_token, temperature = temperature, top_p = top_p)[0], tokenizer=tokenizer) iface = gr.Interface(fn=model_generate, inputs=["text", gr.Slider(10, 1000), gr.Slider(0, 1, value=0.7, step = 0.05), gr.Slider(0, 1, value=0.95, step = 0.05)], outputs="text") iface.launch()