File size: 1,262 Bytes
3afe7b3
825827f
8c5b923
3afe7b3
 
 
8c5b923
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825827f
8c5b923
 
3afe7b3
8c5b923
 
 
b789c6c
8c5b923
3afe7b3
8c5b923
97a2955
3afe7b3
8c5b923
 
3afe7b3
8c5b923
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torch
import torch.nn as nn
from model import TransformerModel  # or however you define your model classes
from transformers import AutoTokenizer
import gradio as gr

# Load half-precision state_dict
checkpoint = torch.load("model_weights_fp16.pt", map_location="cpu")
state_dict_fp16 = checkpoint["model_state_dict"]

# Create model in FP16
model = TransformerModel(
    vocab_size=49152,
    hidden_size=576,
    num_hidden_layers=30,
    num_attention_heads=9,
    intermediate_size=1536,
    num_key_value_heads=3,
    max_position_embeddings=2048,
    rms_norm_eps=1e-5,
    hidden_act="silu",
    tie_word_embeddings=True,
)

# Convert model to half precision
model.half()

# Load the half-precision weights
model.load_state_dict(state_dict_fp16, strict=False)
model.eval()

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")

def generate_text(prompt, max_length=50):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=max_length, do_sample=True)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

gr.Interface(fn=generate_text, inputs="text", outputs="text").launch()