imvladikon
commited on
Commit
路
8016cd6
1
Parent(s):
44bfa66
Update README.md
Browse files
README.md
CHANGED
@@ -41,6 +41,21 @@ for token in top_5_tokens:
|
|
41 |
# 讗讬诇转 讛讬讗 诪讟专讜驻讜诇讬谉 讛诪讛讜讜讛 讗转 诪专讻讝 讛讻诇讻诇讛
|
42 |
```
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
When using AlephBertGimmel, please reference:
|
46 |
|
|
|
41 |
# 讗讬诇转 讛讬讗 诪讟专讜驻讜诇讬谉 讛诪讛讜讜讛 讗转 诪专讻讝 讛讻诇讻诇讛
|
42 |
```
|
43 |
|
44 |
+
```
|
45 |
+
def ppl_naive(text, model, tokenizer):
|
46 |
+
input = tokenizer.encode(text, return_tensors="pt")
|
47 |
+
loss = model(input, labels=input)[0]
|
48 |
+
return torch.exp(loss).item()
|
49 |
+
|
50 |
+
text = """{} 讛讬讗 注讬专 讛讘讬专讛 砖诇 诪讚讬谞转 讬砖专讗诇, 讜讛注讬专 讛讙讚讜诇讛 讘讬讜转专 讘讬砖专讗诇 讘讙讜讚诇 讛讗讜讻诇讜住讬讬讛"""
|
51 |
+
|
52 |
+
for word in ["讞讬驻讛", "讬专讜砖诇讬诐", "转诇 讗讘讬讘"]:
|
53 |
+
print(ppl_naive(text.format(word), model, tokenizer))
|
54 |
+
|
55 |
+
# 10.181422233581543
|
56 |
+
# 9.743313789367676
|
57 |
+
# 10.171016693115234
|
58 |
+
```
|
59 |
|
60 |
When using AlephBertGimmel, please reference:
|
61 |
|