Spaces:
Sleeping
Sleeping
File size: 1,259 Bytes
2c2081e dbfaf91 2c2081e 66fcc65 2c2081e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""Test gen_cmat.
king
In [21]: len(de)
Out[21]: 51
In [22]: len(en)
Out[22]: 51
In [23]: len(" ".join(en))
Out[23]: 11208
In [24]: len(" ".join(de))
Out[24]: 13532
In [25]: %time en_vec = model_s.encode(en)
CPU times: user 22 s, sys: 436 ms, total: 22.4 s
Wall time: 22.4 s
In [26]: %time de_vec = model_s.encode(de)
CPU times: user 22.8 s, sys: 311 ms, total: 23.1 s
Wall time: 23.1 s
en1 = loadparas("data/sternstunden04-en.txt")
en2 = loadparas("data/sternstunden04-de.txt")
len(en1) # 30
len(" ".join(en1)) # 29718
len(" ".join(en2)) # 31478
"""
from cmat2aset310 import cmat2aset
from aset2pairs import aset2pairs
from st_mlbee.gen_cmat import gen_cmat
from st_mlbee.loadtext import loadparas
paras1 = loadparas("data/sternstunden04-en.txt")
paras2 = loadparas("data/sternstunden04-de.txt")
cmat = gen_cmat(paras1, paras2)
def test_gen_cmat_sternstunden04():
"""Test gen_cmat sternstunden04."""
len1, len2 = len(paras1), len(paras2)
# note the order
assert cmat.shape == (len2, len1)
def test_aset2pairs():
"""Test aset2pairs."""
aset = cmat2aset(cmat)
pairs = aset2pairs(paras1, paras2, aset)
assert "Marseillaise" in pairs[2][0]
assert "Marseillaise" in pairs[2][1]
assert pairs[2][2] > 0.95
|