Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn as nn | |
from model import TransformerModel # or however you define your model classes | |
from transformers import AutoTokenizer | |
import gradio as gr | |
# Load half-precision state_dict | |
checkpoint = torch.load("model_weights_fp16.pt", map_location="cpu") | |
state_dict_fp16 = checkpoint["model_state_dict"] | |
# Create model in FP16 | |
model = TransformerModel( | |
vocab_size=49152, | |
hidden_size=576, | |
num_hidden_layers=30, | |
num_attention_heads=9, | |
intermediate_size=1536, | |
num_key_value_heads=3, | |
max_position_embeddings=2048, | |
rms_norm_eps=1e-5, | |
hidden_act="silu", | |
tie_word_embeddings=True, | |
) | |
# Convert model to half precision | |
model.half() | |
# Load the half-precision weights | |
model.load_state_dict(state_dict_fp16, strict=False) | |
model.eval() | |
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer") | |
def generate_text(prompt, max_length=50): | |
input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
with torch.no_grad(): | |
output_ids = model.generate(input_ids, max_length=max_length, do_sample=True) | |
return tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
gr.Interface(fn=generate_text, inputs="text", outputs="text").launch() | |