KoichiYasuoka commited on
Commit
107a0bc
·
1 Parent(s): ca02957

model improved

Browse files
Files changed (2) hide show
  1. maker.py +14 -1
  2. pytorch_model.bin +1 -1
maker.py CHANGED
@@ -68,7 +68,7 @@ class UDTriangularDataset(object):
68
  ids=[self.tokenizer.cls_token_id]
69
  upos=["SYM|x"]
70
  for i,k in enumerate(v):
71
- if len(v)<128 or x[i]=="o":
72
  ids.append(k)
73
  upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
74
  for j in range(i+1,len(v)):
@@ -76,6 +76,19 @@ class UDTriangularDataset(object):
76
  upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
77
  ids.append(self.tokenizer.sep_token_id)
78
  upos.append("SYM|x")
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
80
  from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
81
  tkz=AutoTokenizer.from_pretrained(src)
 
68
  ids=[self.tokenizer.cls_token_id]
69
  upos=["SYM|x"]
70
  for i,k in enumerate(v):
71
+ if len(v)<127 or x[i]=="o":
72
  ids.append(k)
73
  upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
74
  for j in range(i+1,len(v)):
 
76
  upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
77
  ids.append(self.tokenizer.sep_token_id)
78
  upos.append("SYM|x")
79
+ i=0
80
+ while len(ids)>8192:
81
+ try:
82
+ i=ids.index(self.tokenizer.sep_token_id,ids.index(self.tokenizer.sep_token_id,i+1)+1)-1
83
+ except:
84
+ break
85
+ while len(ids)>8192 and ids[i]!=self.tokenizer.sep_token_id:
86
+ if upos[i].endswith("|x"):
87
+ ids.pop(i)
88
+ upos.pop(i)
89
+ i-=1
90
+ else:
91
+ break
92
  return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
93
  from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
94
  tkz=AutoTokenizer.from_pretrained(src)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b384817d170ba11a26b82d40295e13f6b9b9f417143890da581577cdc41851
3
  size 458220274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0495bee3fdd2fbff26d2bbb1e6368b405767de06ad4de758a198f2aa8b6da608
3
  size 458220274