File size: 3,602 Bytes
613af8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import struct
import torch
import numpy as np
from collections import OrderedDict
from pathlib import Path
import sys

if len(sys.argv) < 3:
    print(
        "Usage: convert-ggml-to-pt.py model.bin dir-output\n")
    sys.exit(1)

fname_inp = Path(sys.argv[1])
dir_out = Path(sys.argv[2])
fname_out = dir_out / "torch-model.pt"



# Open the ggml file
with open(fname_inp, "rb") as f:
    # Read magic number and hyperparameters
    magic_number, n_vocab, n_audio_ctx, n_audio_state, n_audio_head, n_audio_layer, n_text_ctx, n_text_state, n_text_head, n_text_layer, n_mels, use_f16 = struct.unpack("12i", f.read(48))
    print(f"Magic number: {magic_number}")
    print(f"Vocab size: {n_vocab}")
    print(f"Audio context size: {n_audio_ctx}")
    print(f"Audio state size: {n_audio_state}")
    print(f"Audio head size: {n_audio_head}")
    print(f"Audio layer size: {n_audio_layer}")
    print(f"Text context size: {n_text_ctx}")
    print(f"Text head size: {n_text_head}")
    print(f"Mel size: {n_mels}")
    # Read mel filters
    # mel_filters = np.fromfile(f, dtype=np.float32, count=n_mels * 2).reshape(n_mels, 2)
    # print(f"Mel filters: {mel_filters}")
    filters_shape_0 = struct.unpack("i", f.read(4))[0]
    print(f"Filters shape 0: {filters_shape_0}")
    filters_shape_1 = struct.unpack("i", f.read(4))[0]
    print(f"Filters shape 1: {filters_shape_1}")

    # Read tokenizer tokens
    # bytes = f.read(4)
    # print(bytes)
    

    # for i in range(filters.shape[0]):
    # for j in range(filters.shape[1]):
    #     fout.write(struct.pack("f", filters[i][j]))
    mel_filters = np.zeros((filters_shape_0, filters_shape_1))

    for i in range(filters_shape_0):
        for j in range(filters_shape_1):
            mel_filters[i][j] = struct.unpack("f", f.read(4))[0]
            
    bytes_data = f.read(4) 
    num_tokens = struct.unpack("i", bytes_data)[0]
    tokens = {}


    for _ in range(num_tokens):
        token_len = struct.unpack("i", f.read(4))[0]
        token = f.read(token_len)
        tokens[token] = {}
    
    # Read model variables
    model_state_dict = OrderedDict()
    while True:
        try:
            n_dims, name_length, ftype = struct.unpack("iii", f.read(12))
        except struct.error:
            break  # End of file
        dims = [struct.unpack("i", f.read(4))[0] for _ in range(n_dims)]
        dims = dims[::-1]
        name = f.read(name_length).decode("utf-8")
        if ftype == 1:  # f16
            data = np.fromfile(f, dtype=np.float16, count=np.prod(dims)).reshape(dims)
        else:  # f32
            data = np.fromfile(f, dtype=np.float32, count=np.prod(dims)).reshape(dims)

            
        if name in  ["encoder.conv1.bias", "encoder.conv2.bias"]:
            
            data = data[:, 0]
        
            
        model_state_dict[name] = torch.from_numpy(data)
    
# Now you have the model's state_dict stored in model_state_dict
# You can load this state_dict into a model with the same architecture

# dims = ModelDimensions(**checkpoint["dims"])
# model = Whisper(dims)
from whisper import Whisper, ModelDimensions
dims = ModelDimensions(
    n_mels=n_mels,
    n_audio_ctx=n_audio_ctx,
    n_audio_state=n_audio_state,
    n_audio_head=n_audio_head,
    n_audio_layer=n_audio_layer,
    n_text_ctx=n_text_ctx,
    n_text_state=n_text_state,
    n_text_head=n_text_head,
    n_text_layer=n_text_layer,
    n_vocab=n_vocab,
)
model = Whisper(dims)  # Replace with your model's class
model.load_state_dict(model_state_dict)

# Save the model in PyTorch format
torch.save(model.state_dict(), fname_out)