Commit
·
0147017
1
Parent(s):
427c6eb
add results
Browse files- index.html +309 -2
index.html
CHANGED
@@ -338,7 +338,314 @@
|
|
338 |
<section class="section">
|
339 |
<div class="container is-max-desktop">
|
340 |
<h2 class="title is-2 has-text-centered">Experiment Results</h2>
|
341 |
-
<h2 class="title is-3 has-text-centered">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
<div class="columns is-centered">
|
343 |
<div class="column">
|
344 |
<div class="content dialogue-block" style="display: flex; flex-direction: column; justify-content: space-between; height: 100%;">
|
@@ -405,7 +712,7 @@
|
|
405 |
</div>
|
406 |
</div>
|
407 |
|
408 |
-
<h2 class="title is-3 has-text-centered">Chinese Dialogues</h2>
|
409 |
<div class="columns is-centered">
|
410 |
<div class="column">
|
411 |
<div class="content dialogue-block" style="display: flex; flex-direction: column; justify-content: space-between; height: 100%;">
|
|
|
338 |
<section class="section">
|
339 |
<div class="container is-max-desktop">
|
340 |
<h2 class="title is-2 has-text-centered">Experiment Results</h2>
|
341 |
+
<h2 class="title is-3 has-text-centered">Results</h2>
|
342 |
+
|
343 |
+
<table style="width:100%; border-collapse: collapse;">
|
344 |
+
<caption>Performances of speech language models on Ke-SpeechChat chat-test.</caption>
|
345 |
+
<thead>
|
346 |
+
<tr>
|
347 |
+
<th rowspan="3" style="text-align:center; vertical-align:middle;">Models</th>
|
348 |
+
<th rowspan="3" style="text-align:center; vertical-align:middle;">Data</th>
|
349 |
+
<th colspan="5" style="text-align:center;">Chinese</th>
|
350 |
+
<th colspan="5" style="text-align:center;">English</th>
|
351 |
+
</tr>
|
352 |
+
<tr>
|
353 |
+
<th colspan="3" style="text-align:center;">S2TIF</th>
|
354 |
+
<th style="text-align:center;">Modal Align</th>
|
355 |
+
<th style="text-align:center;">Quality</th>
|
356 |
+
<th colspan="3" style="text-align:center;">S2TIF</th>
|
357 |
+
<th style="text-align:center;">Modal Align</th>
|
358 |
+
<th style="text-align:center;">Quality</th>
|
359 |
+
</tr>
|
360 |
+
<tr>
|
361 |
+
<th style="text-align:center;">Content<sup>↑</sup></th>
|
362 |
+
<th style="text-align:center;">Style<sup>↑</sup></th>
|
363 |
+
<th style="text-align:center;">Length</th>
|
364 |
+
<th style="text-align:center;">CER<sup>↓</sup></th>
|
365 |
+
<th style="text-align:center;">UTMOS<sup>↑</sup></th>
|
366 |
+
<th style="text-align:center;">Content<sup>↑</sup></th>
|
367 |
+
<th style="text-align:center;">Style<sup>↑</sup></th>
|
368 |
+
<th style="text-align:center;">Length</th>
|
369 |
+
<th style="text-align:center;">WER<sup>↓</sup></th>
|
370 |
+
<th style="text-align:center;">UTMOS<sup>↑</sup></th>
|
371 |
+
</tr>
|
372 |
+
</thead>
|
373 |
+
<tbody>
|
374 |
+
<tr>
|
375 |
+
<td rowspan="1" style="text-align:center; vertical-align:middle;">Qwen2-Audio</td>
|
376 |
+
<td style="text-align:center;">-</td>
|
377 |
+
<td style="text-align:center;">3.32</td>
|
378 |
+
<td style="text-align:center;">3.09</td>
|
379 |
+
<td style="text-align:center;">242.74</td>
|
380 |
+
<td style="text-align:center;">-</td>
|
381 |
+
<td style="text-align:center;">-</td>
|
382 |
+
<td style="text-align:center;">2.39</td>
|
383 |
+
<td style="text-align:center;">2.85</td>
|
384 |
+
<td style="text-align:center;">115.58</td>
|
385 |
+
<td style="text-align:center;">-</td>
|
386 |
+
<td style="text-align:center;">-</td>
|
387 |
+
</tr>
|
388 |
+
<tr>
|
389 |
+
<td rowspan="1" style="text-align:center; vertical-align:middle;">LLaMA-Omni</td>
|
390 |
+
<td style="text-align:center;">-</td>
|
391 |
+
<td colspan="5" style="text-align:center;">-</td>
|
392 |
+
<td style="text-align:center;">2.85</td>
|
393 |
+
<td style="text-align:center;">3.70</td>
|
394 |
+
<td style="text-align:center;">46.80</td>
|
395 |
+
<td style="text-align:center;">9.27</td>
|
396 |
+
<td style="text-align:center;">3.95</td>
|
397 |
+
</tr>
|
398 |
+
<tr>
|
399 |
+
<td rowspan="1" style="text-align:center; vertical-align:middle;">SpeechGPT</td>
|
400 |
+
<td style="text-align:center;">-</td>
|
401 |
+
<td colspan="5" style="text-align:center;">-</td>
|
402 |
+
<td style="text-align:center;">2.86</td>
|
403 |
+
<td style="text-align:center;">3.37</td>
|
404 |
+
<td style="text-align:center;">57.00</td>
|
405 |
+
<td style="text-align:center;">56.34</td>
|
406 |
+
<td style="text-align:center;">3.90</td>
|
407 |
+
</tr>
|
408 |
+
<tr>
|
409 |
+
<td rowspan="5" style="text-align:center; vertical-align:middle;">KE-Omni(ours)</td>
|
410 |
+
<td style="text-align:center;">XS</td>
|
411 |
+
<td style="text-align:center;">3.04</td>
|
412 |
+
<td style="text-align:center;">3.79</td>
|
413 |
+
<td style="text-align:center;">90.91</td>
|
414 |
+
<td style="text-align:center;">29.35</td>
|
415 |
+
<td style="text-align:center;">2.74</td>
|
416 |
+
<td style="text-align:center;">2.79</td>
|
417 |
+
<td style="text-align:center;">3.61</td>
|
418 |
+
<td style="text-align:center;">72.77</td>
|
419 |
+
<td style="text-align:center;">36.10</td>
|
420 |
+
<td style="text-align:center;">3.06</td>
|
421 |
+
</tr>
|
422 |
+
<tr>
|
423 |
+
<td style="text-align:center;">S</td>
|
424 |
+
<td style="text-align:center;">3.47</td>
|
425 |
+
<td style="text-align:center;">4.04</td>
|
426 |
+
<td style="text-align:center;">90.70</td>
|
427 |
+
<td style="text-align:center;">7.61</td>
|
428 |
+
<td style="text-align:center;">3.26</td>
|
429 |
+
<td style="text-align:center;">2.96</td>
|
430 |
+
<td style="text-align:center;">3.69</td>
|
431 |
+
<td style="text-align:center;">68.83</td>
|
432 |
+
<td style="text-align:center;">8.81</td>
|
433 |
+
<td style="text-align:center;">3.90</td>
|
434 |
+
</tr>
|
435 |
+
<tr>
|
436 |
+
<td style="text-align:center;">M</td>
|
437 |
+
<td style="text-align:center;">3.89</td>
|
438 |
+
<td style="text-align:center;">4.24</td>
|
439 |
+
<td style="text-align:center;">89.29</td>
|
440 |
+
<td style="text-align:center;">5.19</td>
|
441 |
+
<td style="text-align:center;">3.39</td>
|
442 |
+
<td style="text-align:center;">3.45</td>
|
443 |
+
<td style="text-align:center;">3.96</td>
|
444 |
+
<td style="text-align:center;">70.02</td>
|
445 |
+
<td style="text-align:center;">4.54</td>
|
446 |
+
<td style="text-align:center;">4.26</td>
|
447 |
+
</tr>
|
448 |
+
<tr>
|
449 |
+
<td style="text-align:center;">L</td>
|
450 |
+
<td style="text-align:center;">3.95</td>
|
451 |
+
<td style="text-align:center;">4.28</td>
|
452 |
+
<td style="text-align:center;">89.54</td>
|
453 |
+
<td style="text-align:center;"><strong>5.03</strong></td>
|
454 |
+
<td style="text-align:center;">3.39</td>
|
455 |
+
<td style="text-align:center;">3.57</td>
|
456 |
+
<td style="text-align:center;">4.00</td>
|
457 |
+
<td style="text-align:center;">69.00</td>
|
458 |
+
<td style="text-align:center;"><strong>3.92</strong></td>
|
459 |
+
<td style="text-align:center;">4.29</td>
|
460 |
+
</tr>
|
461 |
+
<tr>
|
462 |
+
<td style="text-align:center;">XL</td>
|
463 |
+
<td style="text-align:center;"><strong>4.12</strong></td>
|
464 |
+
<td style="text-align:center;"><strong>4.34</strong></td>
|
465 |
+
<td style="text-align:center;">88.10</td>
|
466 |
+
<td style="text-align:center;">5.16</td>
|
467 |
+
<td style="text-align:center;"><strong>3.43</strong></td>
|
468 |
+
<td style="text-align:center;"><strong>3.61</strong></td>
|
469 |
+
<td style="text-align:center;"><strong>4.00</strong></td>
|
470 |
+
<td style="text-align:center;">68.70</td>
|
471 |
+
<td style="text-align:center;">4.29</td>
|
472 |
+
<td style="text-align:center;"><strong>4.30</strong></td>
|
473 |
+
</tr>
|
474 |
+
</tbody>
|
475 |
+
</table>
|
476 |
+
|
477 |
+
<style>
|
478 |
+
table, th, td {
|
479 |
+
border: 1px solid black;
|
480 |
+
padding: 5px;
|
481 |
+
text-align: center;
|
482 |
+
}
|
483 |
+
caption {
|
484 |
+
font-size: 1.2em;
|
485 |
+
margin-bottom: 10px;
|
486 |
+
}
|
487 |
+
th {
|
488 |
+
background-color: #f2f2f2;
|
489 |
+
}
|
490 |
+
tr:nth-child(even) {background-color: #f9f9f9;}
|
491 |
+
tr:hover {background-color:#f1f1f1;}
|
492 |
+
sup {
|
493 |
+
font-size: 0.8em;
|
494 |
+
vertical-align: super;
|
495 |
+
line-height: 0;
|
496 |
+
}
|
497 |
+
</style>
|
498 |
+
|
499 |
+
<table style="width:100%; border-collapse: collapse;">
|
500 |
+
<caption>Results of VoiceBench. For detailed results, please visit <a href="http://github.com/MatthewCYM/VoiceBench?tab=readme-ov-file#leaderboard">[VoiceBench Leadboard]</a>.</caption>
|
501 |
+
<thead>
|
502 |
+
<tr>
|
503 |
+
<th style="text-align:center;">Model</th>
|
504 |
+
<th style="text-align:center;">AlpacaEval<sup>↑</sup></th>
|
505 |
+
<th style="text-align:center;">CommonEval<sup>↑</sup></th>
|
506 |
+
<th style="text-align:center;">SD-QA<sup>↑</sup></th>
|
507 |
+
<th style="text-align:center;">MMSU<sup>↑</sup></th>
|
508 |
+
<th style="text-align:center;">OpenBookQA<sup>↑</sup></th>
|
509 |
+
<th style="text-align:center;">IFEval<sup>↑</sup></th>
|
510 |
+
<th style="text-align:center;">AdvBench<sup>↑</sup></th>
|
511 |
+
<th style="text-align:center;">Overall<sup>↑</sup></th>
|
512 |
+
</tr>
|
513 |
+
</thead>
|
514 |
+
<tbody>
|
515 |
+
<tr>
|
516 |
+
<td style="text-align:left;">GLM-4-Voice</td>
|
517 |
+
<td style="text-align:center;">3.97</td>
|
518 |
+
<td style="text-align:center;">3.42</td>
|
519 |
+
<td style="text-align:center;">36.98</td>
|
520 |
+
<td style="text-align:center;">39.75</td>
|
521 |
+
<td style="text-align:center;">53.41</td>
|
522 |
+
<td style="text-align:center;">25.92</td>
|
523 |
+
<td style="text-align:center;">88.08</td>
|
524 |
+
<td style="text-align:center;">55.99</td>
|
525 |
+
</tr>
|
526 |
+
<tr>
|
527 |
+
<td style="text-align:left;">DiVA</td>
|
528 |
+
<td style="text-align:center;">3.67</td>
|
529 |
+
<td style="text-align:center;">3.54</td>
|
530 |
+
<td style="text-align:center;">57.05</td>
|
531 |
+
<td style="text-align:center;">25.76</td>
|
532 |
+
<td style="text-align:center;">25.49</td>
|
533 |
+
<td style="text-align:center;">39.15</td>
|
534 |
+
<td style="text-align:center;">98.27</td>
|
535 |
+
<td style="text-align:center;">55.70</td>
|
536 |
+
</tr>
|
537 |
+
<tr>
|
538 |
+
<td style="text-align:left;">Qwen2-Audio</td>
|
539 |
+
<td style="text-align:center;">3.74</td>
|
540 |
+
<td style="text-align:center;">3.43</td>
|
541 |
+
<td style="text-align:center;">35.71</td>
|
542 |
+
<td style="text-align:center;">35.72</td>
|
543 |
+
<td style="text-align:center;">49.45</td>
|
544 |
+
<td style="text-align:center;">26.33</td>
|
545 |
+
<td style="text-align:center;">96.73</td>
|
546 |
+
<td style="text-align:center;">55.35</td>
|
547 |
+
</tr>
|
548 |
+
<tr>
|
549 |
+
<td style="text-align:left;"><strong>KE-Omni-v1.5</strong></td>
|
550 |
+
<td style="text-align:center;">3.82</td>
|
551 |
+
<td style="text-align:center;">3.20</td>
|
552 |
+
<td style="text-align:center;">31.20</td>
|
553 |
+
<td style="text-align:center;">32.27</td>
|
554 |
+
<td style="text-align:center;">58.46</td>
|
555 |
+
<td style="text-align:center;">15.00</td>
|
556 |
+
<td style="text-align:center;">100.00</td>
|
557 |
+
<td style="text-align:center;">53.90</td>
|
558 |
+
</tr>
|
559 |
+
<tr>
|
560 |
+
<td style="text-align:left;">LLaMA-Omni</td>
|
561 |
+
<td style="text-align:center;">3.70</td>
|
562 |
+
<td style="text-align:center;">3.46</td>
|
563 |
+
<td style="text-align:center;">39.69</td>
|
564 |
+
<td style="text-align:center;">25.93</td>
|
565 |
+
<td style="text-align:center;">27.47</td>
|
566 |
+
<td style="text-align:center;">14.87</td>
|
567 |
+
<td style="text-align:center;">11.35</td>
|
568 |
+
<td style="text-align:center;">37.51</td>
|
569 |
+
</tr>
|
570 |
+
<tr>
|
571 |
+
<td style="text-align:left;">VITA</td>
|
572 |
+
<td style="text-align:center;">3.38</td>
|
573 |
+
<td style="text-align:center;">2.15</td>
|
574 |
+
<td style="text-align:center;">27.94</td>
|
575 |
+
<td style="text-align:center;">25.70</td>
|
576 |
+
<td style="text-align:center;">29.01</td>
|
577 |
+
<td style="text-align:center;">22.82</td>
|
578 |
+
<td style="text-align:center;">26.73</td>
|
579 |
+
<td style="text-align:center;">34.68</td>
|
580 |
+
</tr>
|
581 |
+
<tr>
|
582 |
+
<td style="text-align:left;">Mini-Omni2</td>
|
583 |
+
<td style="text-align:center;">2.32</td>
|
584 |
+
<td style="text-align:center;">2.18</td>
|
585 |
+
<td style="text-align:center;">9.31</td>
|
586 |
+
<td style="text-align:center;">24.27</td>
|
587 |
+
<td style="text-align:center;">26.59</td>
|
588 |
+
<td style="text-align:center;">11.56</td>
|
589 |
+
<td style="text-align:center;">57.50</td>
|
590 |
+
<td style="text-align:center;">31.32</td>
|
591 |
+
</tr>
|
592 |
+
<tr>
|
593 |
+
<td style="text-align:left;">Mini-Omni</td>
|
594 |
+
<td style="text-align:center;">1.95</td>
|
595 |
+
<td style="text-align:center;">2.02</td>
|
596 |
+
<td style="text-align:center;">13.92</td>
|
597 |
+
<td style="text-align:center;">24.69</td>
|
598 |
+
<td style="text-align:center;">26.59</td>
|
599 |
+
<td style="text-align:center;">13.58</td>
|
600 |
+
<td style="text-align:center;">37.12</td>
|
601 |
+
<td style="text-align:center;">27.90</td>
|
602 |
+
</tr>
|
603 |
+
<tr>
|
604 |
+
<td style="text-align:left;">Moshi</td>
|
605 |
+
<td style="text-align:center;">2.01</td>
|
606 |
+
<td style="text-align:center;">1.60</td>
|
607 |
+
<td style="text-align:center;">15.64</td>
|
608 |
+
<td style="text-align:center;">24.04</td>
|
609 |
+
<td style="text-align:center;">25.93</td>
|
610 |
+
<td style="text-align:center;">10.12</td>
|
611 |
+
<td style="text-align:center;">44.23</td>
|
612 |
+
<td style="text-align:center;">27.47</td>
|
613 |
+
</tr>
|
614 |
+
</tbody>
|
615 |
+
</table>
|
616 |
+
|
617 |
+
<style>
|
618 |
+
table, th, td {
|
619 |
+
border: 1px solid black;
|
620 |
+
padding: 5px;
|
621 |
+
text-align: center;
|
622 |
+
}
|
623 |
+
caption {
|
624 |
+
font-size: 1.2em;
|
625 |
+
margin-bottom: 10px;
|
626 |
+
}
|
627 |
+
th {
|
628 |
+
background-color: #f2f2f2;
|
629 |
+
}
|
630 |
+
tr:nth-child(even) {background-color: #f9f9f9;}
|
631 |
+
tr:hover {background-color:#f1f1f1;}
|
632 |
+
sup {
|
633 |
+
font-size: 0.8em;
|
634 |
+
vertical-align: super;
|
635 |
+
line-height: 0;
|
636 |
+
}
|
637 |
+
strong {
|
638 |
+
font-weight: bold;
|
639 |
+
}
|
640 |
+
em {
|
641 |
+
font-style: italic;
|
642 |
+
}
|
643 |
+
a {
|
644 |
+
text-decoration: none;
|
645 |
+
}
|
646 |
+
</style>
|
647 |
+
|
648 |
+
<h2 class="title is-3 has-text-centered">Case Study: English Dialogues</h2>
|
649 |
<div class="columns is-centered">
|
650 |
<div class="column">
|
651 |
<div class="content dialogue-block" style="display: flex; flex-direction: column; justify-content: space-between; height: 100%;">
|
|
|
712 |
</div>
|
713 |
</div>
|
714 |
|
715 |
+
<h2 class="title is-3 has-text-centered">Case Study: Chinese Dialogues</h2>
|
716 |
<div class="columns is-centered">
|
717 |
<div class="column">
|
718 |
<div class="content dialogue-block" style="display: flex; flex-direction: column; justify-content: space-between; height: 100%;">
|