bmarci commited on
Commit
89a49c1
·
1 Parent(s): cdba24f
Files changed (2) hide show
  1. README.md +1 -0
  2. utils/model_utils.py +51 -0
README.md CHANGED
@@ -9,6 +9,7 @@ app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  short_description: An example for using NextStep-1-Large
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  pinned: false
10
  license: apache-2.0
11
  short_description: An example for using NextStep-1-Large
12
+ python_version: "3.11"
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
utils/model_utils.py CHANGED
@@ -1,4 +1,55 @@
1
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  def expand_t(t, x):
 
1
  import torch
2
+ import numpy as np
3
+
4
+
5
+ def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0, pe_interpolation=1.0):
6
+ """
7
+ grid_size: int of the grid height and width
8
+ return:
9
+ pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
10
+ """
11
+ grid_h = np.arange(grid_size, dtype=np.float32) / pe_interpolation
12
+ grid_w = np.arange(grid_size, dtype=np.float32) / pe_interpolation
13
+ grid = np.meshgrid(grid_w, grid_h) # here w goes first
14
+ grid = np.stack(grid, axis=0)
15
+
16
+ grid = grid.reshape([2, 1, grid_size, grid_size])
17
+ pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
18
+ if cls_token and extra_tokens > 0:
19
+ pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0)
20
+ return pos_embed
21
+
22
+
23
+ def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
24
+ assert embed_dim % 2 == 0
25
+
26
+ # use half of dimensions to encode grid_h
27
+ emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
28
+ emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
29
+
30
+ emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
31
+ return emb
32
+
33
+
34
+ def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
35
+ """
36
+ embed_dim: output dimension for each position
37
+ pos: a list of positions to be encoded: size (M,)
38
+ out: (M, D)
39
+ """
40
+ assert embed_dim % 2 == 0
41
+ omega = np.arange(embed_dim // 2, dtype=np.float64)
42
+ omega /= embed_dim / 2.0
43
+ omega = 1.0 / 10000**omega # (D/2,)
44
+
45
+ pos = pos.reshape(-1) # (M,)
46
+ out = np.einsum("m,d->md", pos, omega) # (M, D/2), outer product
47
+
48
+ emb_sin = np.sin(out) # (M, D/2)
49
+ emb_cos = np.cos(out) # (M, D/2)
50
+
51
+ emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
52
+ return emb
53
 
54
 
55
  def expand_t(t, x):