PromeMobile / app.py
Neu256's picture
Update app.py
a2d8cae verified
raw
history blame
1.43 kB
import gradio as gr
import torch
import numpy as np
from model import Transformer
from transformers import PreTrainedTokenizerFast
from tokenizers import Tokenizer
from utils import (
DEVICE,
DROPOUT,
NUM_EMBED,
NUM_HEAD,
NUM_LAYER,
BLOCK_SIZE,
encode,
decode
)
tokenizer = PreTrainedTokenizerFast(tokenizer_object=Tokenizer.from_file("BPE.json"))
vocab_size = tokenizer.vocab_size
# train a new model
model = Transformer(
vocab_size=vocab_size,
num_embed=NUM_EMBED,
block_size=BLOCK_SIZE,
num_heads=NUM_HEAD,
num_layers=NUM_LAYER,
dropout=DROPOUT
)
# load model to GPU if available
m = model.to(DEVICE)
# print the number of parameters in the model
m.load_state_dict(torch.load("base_model.pth", map_location=torch.device(DEVICE)))
m.eval()
#print(
# "Model with {:.2f}M parameters".format(sum(p.numel() for p in m.parameters()) / 1e6)
#)
def model_generate(text, number_of_new_token, temperature, top_p):
print(text)
context = encode(str(text), tokenizer).unsqueeze(0).to(DEVICE)
gen = decode(enc_sec=m.generate(idx=context, max_new_tokens=number_of_new_token, temperature = temperature, top_p = top_p)[0], tokenizer=tokenizer)
print(gen)
return gen
iface = gr.Interface(fn=model_generate, inputs=["text", gr.Slider(10, 1000), gr.Slider(0, 1, value=0.7, step = 0.05), gr.Slider(0, 1, value=0.95, step = 0.05)], outputs="text")
iface.launch()