Commit
·
107a0bc
1
Parent(s):
ca02957
model improved
Browse files- maker.py +14 -1
- pytorch_model.bin +1 -1
maker.py
CHANGED
@@ -68,7 +68,7 @@ class UDTriangularDataset(object):
|
|
68 |
ids=[self.tokenizer.cls_token_id]
|
69 |
upos=["SYM|x"]
|
70 |
for i,k in enumerate(v):
|
71 |
-
if len(v)<
|
72 |
ids.append(k)
|
73 |
upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
|
74 |
for j in range(i+1,len(v)):
|
@@ -76,6 +76,19 @@ class UDTriangularDataset(object):
|
|
76 |
upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
|
77 |
ids.append(self.tokenizer.sep_token_id)
|
78 |
upos.append("SYM|x")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
|
80 |
from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
|
81 |
tkz=AutoTokenizer.from_pretrained(src)
|
|
|
68 |
ids=[self.tokenizer.cls_token_id]
|
69 |
upos=["SYM|x"]
|
70 |
for i,k in enumerate(v):
|
71 |
+
if len(v)<127 or x[i]=="o":
|
72 |
ids.append(k)
|
73 |
upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
|
74 |
for j in range(i+1,len(v)):
|
|
|
76 |
upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
|
77 |
ids.append(self.tokenizer.sep_token_id)
|
78 |
upos.append("SYM|x")
|
79 |
+
i=0
|
80 |
+
while len(ids)>8192:
|
81 |
+
try:
|
82 |
+
i=ids.index(self.tokenizer.sep_token_id,ids.index(self.tokenizer.sep_token_id,i+1)+1)-1
|
83 |
+
except:
|
84 |
+
break
|
85 |
+
while len(ids)>8192 and ids[i]!=self.tokenizer.sep_token_id:
|
86 |
+
if upos[i].endswith("|x"):
|
87 |
+
ids.pop(i)
|
88 |
+
upos.pop(i)
|
89 |
+
i-=1
|
90 |
+
else:
|
91 |
+
break
|
92 |
return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
|
93 |
from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
|
94 |
tkz=AutoTokenizer.from_pretrained(src)
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 458220274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0495bee3fdd2fbff26d2bbb1e6368b405767de06ad4de758a198f2aa8b6da608
|
3 |
size 458220274
|