File size: 1,517 Bytes
c2ba4d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import streamlit as st

from view_utils import default_page_setting, set_nav_bar

sidebar_placeholder = default_page_setting()
set_nav_bar(
    False, sidebar_placeholder=sidebar_placeholder, toggle_hashstr="brief_intro_init"
)

st.image("va_en.png")
if st.session_state.korean:
    st.info("μ—¬λŸ¬λΆ„μ΄ μ—…λ‘œλ“œν•˜λŠ” νŒŒμΌμ€ μœ„ 그림의 LLM 좜λ ₯λ“€μž…λ‹ˆλ‹€.")
    with st.expander("❓❔ 무엇을 μ—…λ‘œλ“œ ν•˜λ‚˜μš”β“β”"):
        st.info(open("guide_mds/input_jsonls_kr.md", encoding="UTF8").read())
else:
    st.info(
        "What you upload is the generated responses of LLMs regarding the test prompts."
    )
    with st.expander("❓❔  What should I upload ❓❔"):
        st.info(open("guide_mds/input_jsonls_en.md", encoding="UTF8").read())


st.image("va_concept_new.png")
st.markdown(
    """
| |Current Practice|Varco Arena|
|-|-|-|
|Total no. matches|$$n_{\\text{model}}*\\|X\\|$$|$$(n_{\\text{model}}-1)*\\|X\\|$$|
|No. matches per LLM|$$\\|X\\|$$|$$\\left[\\|X\\|,\\|X\\|\\text{log}n_{\\text{model}}\\right]$$|
|Comparison type|reference-mediated (anchored) |direct|
            """
)
if st.session_state.korean:
    st.info(
        "Varco ArenaλŠ” μ‹ λ’°μ„± μžˆλŠ” μˆœμœ„λ₯Ό 더 적은 횟수의 비ꡐ 내에 μ–»μ–΄λ‚΄λ©°, μ΄λŸ¬ν•œ νŠΉμ§•μ€ LLM 직접 λΉ„κ΅μ˜ μ΄μ μœΌλ‘œλΆ€ν„° κΈ°μΈν•©λ‹ˆλ‹€."
    )
else:
    st.info(
        "Varco Arena takes advantage of direct comparison between LLM responses to guarantee better reliability in fewer number of total matches."
    )