kristaller486 commited on
Commit
24c5d3e
·
verified ·
1 Parent(s): f6683d2

add Polaris Alpha

Browse files
Files changed (1) hide show
  1. index.html +50 -14
index.html CHANGED
@@ -850,8 +850,44 @@
850
  <td class="num mono" data-label="Всего токенов">132,071</td>
851
  </tr>
852
 
853
- <tr data-model="openai/tiiuae/Falcon-H1-34B-Instruct">
854
  <td class="rank mono sticky-0" data-label="#">#22</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
  <td class="model-name sticky-1" data-label="Модель">tiiuae/Falcon-H1-34B-Instruct (vllm)</td>
856
  <td class="num mono" data-label="Критичные/1000">
857
 
@@ -887,7 +923,7 @@
887
  </tr>
888
 
889
  <tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
890
- <td class="rank mono sticky-0" data-label="#">#23</td>
891
  <td class="model-name sticky-1" data-label="Модель">Qwen3-235B-A22B-2507-Instruct (Alibaba API)</td>
892
  <td class="num mono" data-label="Критичные/1000">
893
 
@@ -923,7 +959,7 @@
923
  </tr>
924
 
925
  <tr data-model="openrouter/qwen/qwen3-vl-8b-instruct">
926
- <td class="rank mono sticky-0" data-label="#">#24</td>
927
  <td class="model-name sticky-1" data-label="Модель">Qwen3-VL-8B-Instruct (Alibaba API, presence_penalty=2)</td>
928
  <td class="num mono" data-label="Критичные/1000">
929
 
@@ -959,7 +995,7 @@
959
  </tr>
960
 
961
  <tr data-model="openrouter/moonshotai/kimi-k2-0905">
962
- <td class="rank mono sticky-0" data-label="#">#25</td>
963
  <td class="model-name sticky-1" data-label="Модель">moonshotai/Kimi-K2-Instruct-0905 (Novita API)</td>
964
  <td class="num mono" data-label="Критичные/1000">
965
 
@@ -995,7 +1031,7 @@
995
  </tr>
996
 
997
  <tr data-model="openrouter/z-ai/glm-4.6">
998
- <td class="rank mono sticky-0" data-label="#">#26</td>
999
  <td class="model-name sticky-1" data-label="Модель">GLM-4.6 (Z.ai API)</td>
1000
  <td class="num mono" data-label="Критичные/1000">
1001
 
@@ -1031,7 +1067,7 @@
1031
  </tr>
1032
 
1033
  <tr data-model="openrouter/openai/gpt-5">
1034
- <td class="rank mono sticky-0" data-label="#">#27</td>
1035
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: minimal)</td>
1036
  <td class="num mono" data-label="Критичные/1000">
1037
 
@@ -1067,7 +1103,7 @@
1067
  </tr>
1068
 
1069
  <tr data-model="openai/aquif-3.5-Max-42B-A3B.Q8_0.gguf">
1070
- <td class="rank mono sticky-0" data-label="#">#28</td>
1071
  <td class="model-name sticky-1" data-label="Модель">aquif-ai/aquif-3.5-Plus-30B-A3B (Q8_0 llama.cpp, without reasoning)</td>
1072
  <td class="num mono" data-label="Критичные/1000">
1073
 
@@ -1103,7 +1139,7 @@
1103
  </tr>
1104
 
1105
  <tr data-model="openrouter/openai/gpt-5">
1106
- <td class="rank mono sticky-0" data-label="#">#29</td>
1107
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: low)</td>
1108
  <td class="num mono" data-label="Критичные/1000">
1109
 
@@ -1139,7 +1175,7 @@
1139
  </tr>
1140
 
1141
  <tr data-model="openai/nvidia/NVIDIA-Nemotron-Nano-12B-v2">
1142
- <td class="rank mono sticky-0" data-label="#">#30</td>
1143
  <td class="model-name sticky-1" data-label="Модель">nvidia/NVIDIA-Nemotron-Nano-12B-v2 (vllm, reasoning=false)</td>
1144
  <td class="num mono" data-label="Критичные/1000">
1145
 
@@ -1175,7 +1211,7 @@
1175
  </tr>
1176
 
1177
  <tr data-model="openrouter/openai/gpt-oss-120b">
1178
- <td class="rank mono sticky-0" data-label="#">#31</td>
1179
  <td class="model-name sticky-1" data-label="Модель">GPT-OSS-120B (Vertex AI API)</td>
1180
  <td class="num mono" data-label="Критичные/1000">
1181
 
@@ -1211,7 +1247,7 @@
1211
  </tr>
1212
 
1213
  <tr data-model="openrouter/mistralai/mistral-nemo">
1214
- <td class="rank mono sticky-0" data-label="#">#32</td>
1215
  <td class="model-name sticky-1" data-label="Модель">Mistral-Nemo (Mistral API)</td>
1216
  <td class="num mono" data-label="Критичные/1000">
1217
 
@@ -1247,7 +1283,7 @@
1247
  </tr>
1248
 
1249
  <tr data-model="openrouter/minimax/minimax-m2:free">
1250
- <td class="rank mono sticky-0" data-label="#">#33</td>
1251
  <td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API)</td>
1252
  <td class="num mono" data-label="Критичные/1000">
1253
 
@@ -1283,7 +1319,7 @@
1283
  </tr>
1284
 
1285
  <tr data-model="openrouter/minimax/minimax-m2:free">
1286
- <td class="rank mono sticky-0" data-label="#">#34</td>
1287
  <td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API, recommend params)</td>
1288
  <td class="num mono" data-label="Критичные/1000">
1289
 
@@ -1341,7 +1377,7 @@
1341
  </div>
1342
  <p class="info-text">Если хотите, чтобы я добавил ту или иную модель в лидерборд - не стесняйтесь открыть issue/pull request на Github.</p>
1343
  <p class="info-text">
1344
- Обновлено: 2025-11-06 08:27:17 | Всего моделей: 34 | <a href="https://github.com/kristaller486/RuQualBench">GitHub</a> | <a href="https://t.me/krists">Telegram</a>
1345
  </p>
1346
  </div>
1347
  <script>
 
850
  <td class="num mono" data-label="Всего токенов">132,071</td>
851
  </tr>
852
 
853
+ <tr data-model="openrouter/openrouter/polaris-alpha">
854
  <td class="rank mono sticky-0" data-label="#">#22</td>
855
+ <td class="model-name sticky-1" data-label="Модель">Polaris Alpha</td>
856
+ <td class="num mono" data-label="Критичные/1000">
857
+
858
+ 0.22 ± 0.05
859
+
860
+ </td>
861
+ <td class="num mono" data-label="Обычные/1000">
862
+
863
+ 0.71 ± 0.02
864
+
865
+ </td>
866
+ <td class="num mono" data-label="Доп./1000">
867
+
868
+ 0.24 ± 0.03
869
+
870
+ </td>
871
+ <td data-label="Нормировано ошибок">
872
+ <div class="score-cell">
873
+ <div class="progress-bar">
874
+
875
+
876
+
877
+ <div class="progress-fill" style="width: 70.60185185185186%"></div>
878
+ </div>
879
+ <span class="score-value">
880
+
881
+ 1.27 ± 0.12
882
+
883
+ </span>
884
+ </div>
885
+ </td>
886
+ <td class="num mono" data-label="Всего токенов">157,197</td>
887
+ </tr>
888
+
889
+ <tr data-model="openai/tiiuae/Falcon-H1-34B-Instruct">
890
+ <td class="rank mono sticky-0" data-label="#">#23</td>
891
  <td class="model-name sticky-1" data-label="Модель">tiiuae/Falcon-H1-34B-Instruct (vllm)</td>
892
  <td class="num mono" data-label="Критичные/1000">
893
 
 
923
  </tr>
924
 
925
  <tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
926
+ <td class="rank mono sticky-0" data-label="#">#24</td>
927
  <td class="model-name sticky-1" data-label="Модель">Qwen3-235B-A22B-2507-Instruct (Alibaba API)</td>
928
  <td class="num mono" data-label="Критичные/1000">
929
 
 
959
  </tr>
960
 
961
  <tr data-model="openrouter/qwen/qwen3-vl-8b-instruct">
962
+ <td class="rank mono sticky-0" data-label="#">#25</td>
963
  <td class="model-name sticky-1" data-label="Модель">Qwen3-VL-8B-Instruct (Alibaba API, presence_penalty=2)</td>
964
  <td class="num mono" data-label="Критичные/1000">
965
 
 
995
  </tr>
996
 
997
  <tr data-model="openrouter/moonshotai/kimi-k2-0905">
998
+ <td class="rank mono sticky-0" data-label="#">#26</td>
999
  <td class="model-name sticky-1" data-label="Модель">moonshotai/Kimi-K2-Instruct-0905 (Novita API)</td>
1000
  <td class="num mono" data-label="Критичные/1000">
1001
 
 
1031
  </tr>
1032
 
1033
  <tr data-model="openrouter/z-ai/glm-4.6">
1034
+ <td class="rank mono sticky-0" data-label="#">#27</td>
1035
  <td class="model-name sticky-1" data-label="Модель">GLM-4.6 (Z.ai API)</td>
1036
  <td class="num mono" data-label="Критичные/1000">
1037
 
 
1067
  </tr>
1068
 
1069
  <tr data-model="openrouter/openai/gpt-5">
1070
+ <td class="rank mono sticky-0" data-label="#">#28</td>
1071
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: minimal)</td>
1072
  <td class="num mono" data-label="Критичные/1000">
1073
 
 
1103
  </tr>
1104
 
1105
  <tr data-model="openai/aquif-3.5-Max-42B-A3B.Q8_0.gguf">
1106
+ <td class="rank mono sticky-0" data-label="#">#29</td>
1107
  <td class="model-name sticky-1" data-label="Модель">aquif-ai/aquif-3.5-Plus-30B-A3B (Q8_0 llama.cpp, without reasoning)</td>
1108
  <td class="num mono" data-label="Критичные/1000">
1109
 
 
1139
  </tr>
1140
 
1141
  <tr data-model="openrouter/openai/gpt-5">
1142
+ <td class="rank mono sticky-0" data-label="#">#30</td>
1143
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: low)</td>
1144
  <td class="num mono" data-label="Критичные/1000">
1145
 
 
1175
  </tr>
1176
 
1177
  <tr data-model="openai/nvidia/NVIDIA-Nemotron-Nano-12B-v2">
1178
+ <td class="rank mono sticky-0" data-label="#">#31</td>
1179
  <td class="model-name sticky-1" data-label="Модель">nvidia/NVIDIA-Nemotron-Nano-12B-v2 (vllm, reasoning=false)</td>
1180
  <td class="num mono" data-label="Критичные/1000">
1181
 
 
1211
  </tr>
1212
 
1213
  <tr data-model="openrouter/openai/gpt-oss-120b">
1214
+ <td class="rank mono sticky-0" data-label="#">#32</td>
1215
  <td class="model-name sticky-1" data-label="Модель">GPT-OSS-120B (Vertex AI API)</td>
1216
  <td class="num mono" data-label="Критичные/1000">
1217
 
 
1247
  </tr>
1248
 
1249
  <tr data-model="openrouter/mistralai/mistral-nemo">
1250
+ <td class="rank mono sticky-0" data-label="#">#33</td>
1251
  <td class="model-name sticky-1" data-label="Модель">Mistral-Nemo (Mistral API)</td>
1252
  <td class="num mono" data-label="Критичные/1000">
1253
 
 
1283
  </tr>
1284
 
1285
  <tr data-model="openrouter/minimax/minimax-m2:free">
1286
+ <td class="rank mono sticky-0" data-label="#">#34</td>
1287
  <td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API)</td>
1288
  <td class="num mono" data-label="Критичные/1000">
1289
 
 
1319
  </tr>
1320
 
1321
  <tr data-model="openrouter/minimax/minimax-m2:free">
1322
+ <td class="rank mono sticky-0" data-label="#">#35</td>
1323
  <td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API, recommend params)</td>
1324
  <td class="num mono" data-label="Критичные/1000">
1325
 
 
1377
  </div>
1378
  <p class="info-text">Если хотите, чтобы я добавил ту или иную модель в лидерборд - не стесняйтесь открыть issue/pull request на Github.</p>
1379
  <p class="info-text">
1380
+ Обновлено: 2025-11-07 09:58:44 | Всего моделей: 35 | <a href="https://github.com/kristaller486/RuQualBench">GitHub</a> | <a href="https://t.me/krists">Telegram</a>
1381
  </p>
1382
  </div>
1383
  <script>