jingyang Ou commited on
Commit
a2c94d3
·
1 Parent(s): 57aea67

model update

Browse files
Files changed (3) hide show
  1. README.md +3 -0
  2. config.json +72 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Reparameterized Absorbing Discrete Diffusion (RADD) small model with t-dce loss.
2
+ Code: https://github.com/ML-GSAI/RADD.
3
+ Paper: https://arxiv.org/abs/2406.03736.
config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ngpus": 32,
3
+ "tokens": 50257,
4
+ "gpt_dir": "assets/gpt2-large",
5
+ "outdir": "../output",
6
+ "training": {
7
+ "batch_size": 512,
8
+ "accum": 1,
9
+ "n_iters": 1000001,
10
+ "snapshot_freq": 50000,
11
+ "log_freq": 50,
12
+ "eval_freq": 100,
13
+ "snapshot_freq_for_preemption": 10000,
14
+ "weight": "standard",
15
+ "snapshot_sampling": false,
16
+ "ema": 0.9999,
17
+ "loss_type": "t_DCE"
18
+ },
19
+ "train_set": {
20
+ "name": "openwebtext",
21
+ "cache_dir": "assets/datasets/openwebtext"
22
+ },
23
+ "valid_set": {
24
+ "name": "wikitext103",
25
+ "cache_dir": "assets/datasets/wikitext"
26
+ },
27
+ "graph": {
28
+ "type": "absorb",
29
+ "file": "data",
30
+ "report_all": false
31
+ },
32
+ "noise": {
33
+ "type": "loglinear",
34
+ "sigma_min": 0.0001,
35
+ "sigma_max": 20
36
+ },
37
+ "sampling": {
38
+ "predictor": "euler",
39
+ "steps": 128,
40
+ "noise_removal": true
41
+ },
42
+ "eval": {
43
+ "batch_size": 512,
44
+ "perplexity": true,
45
+ "perplexity_batch_size": 16
46
+ },
47
+ "optim": {
48
+ "weight_decay": 0.03,
49
+ "optimizer": "AdamW",
50
+ "lr": 0.0003,
51
+ "beta1": 0.9,
52
+ "beta2": 0.999,
53
+ "eps": 1e-08,
54
+ "warmup": 2500,
55
+ "grad_clip": 1.0
56
+ },
57
+ "model": {
58
+ "name": "small_wotsm",
59
+ "type": "ddit_wot",
60
+ "hidden_size": 768,
61
+ "cond_dim": 128,
62
+ "length": 1024,
63
+ "n_blocks": 12,
64
+ "n_heads": 12,
65
+ "scale_by_sigma": true,
66
+ "dropout": 0.02,
67
+ "use_checkpoint": false,
68
+ "remove_time_condition": true,
69
+ "add_softmax": true,
70
+ "dtype": "float16"
71
+ }
72
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e3d7fbd1284eb9ec9970cdb06b6270bc1cc810d78224e10c67770906021f9c
3
+ size 649074528