Spaces:

NCSOFT
/

VARCO_Arena

Running

File size: 11,963 Bytes

c2ba4d5

# import shutil
import os
import select
import subprocess
import sys
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import *

import streamlit as st

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from varco_arena_core.prompts import load_prompt

from view_utils import (
    default_page_setting,
    escape_markdown,
    set_nav_bar,
    show_linebreak_in_md,
)

# import sys
# print(sys.executable)


VA_ROOT = Path(os.environ.get("VARCO_ARENA_RESULT_PATH", "./user_submit"))
USR_SUB = VA_ROOT.parts[-1]


def upload_files(uploaded_files) -> Path:
    # prep directory for user submission
    user_sub_root = VA_ROOT
    if user_sub_root.exists():
        if not user_sub_root.is_dir():
            raise ValueError(
                f"{user_sub_root} file exists and is not a directory. Consider renaming it."
            )
    else:
        user_sub_root.mkdir(parents=True)

    KST = timezone(timedelta(hours=9))
    tstamp = datetime.now(KST)
    tstr = tstamp.strftime("%m-%d_%H:%M:%S")
    files_dir_str = "./" + str(user_sub_root / tstr)
    files_dir = Path(files_dir_str)
    files_dir.mkdir(parents=True, exist_ok=True)
    uploaded_files = list(uploaded_files)

    if not uploaded_files:
        st.warning("❌ No files to upload. Please drag/drop or browse files to upload.")
    elif len(uploaded_files) < 2:
        st.error("❌ You need at least 2 jsonlines files to properly run VA.")
    else:  # properly uploaded
        for file in uploaded_files:
            # Create a path for the file in the server directory
            file_path = files_dir / file.name

            # Save the file to the server directory
            with open(file_path, "wb") as f:
                f.write(file.getbuffer())

        jslfiles = list(files_dir.glob("*.jsonl"))
        st.success(f"✅ Successfully uploaded {len(jslfiles)} jsonl files.")
        return files_dir.resolve()


def run_varco_arena(
    price_estimation: bool = False,
    # upload_dir: Union[str, Path] = None,
    promptname: str = None,
    exp_name: str = None,
    api_key: Optional[str] = None,
    evaluation_model: str = "gpt-4o-mini",
    update_interval: float = 1.0,
):
    # Use environment variable for API key
    ptn = f"{str(st.session_state.upfiles_dir)}"
    outdir = Path(ptn)
    if exp_name:
        outdir = outdir / exp_name

    command = f"python ../varco_arena/main.py -i {ptn} -o {outdir} -k {api_key} -p {promptname} -e {evaluation_model} -j 64"
    if price_estimation:
        command = f"{command} -c"
    else:
        command = command.replace("python", "yes | python ")
    print(command)

    api_key = None  # clear immediately

    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        stdin=subprocess.PIPE,
        text=True,
        bufsize=1,
        shell=True,
    )

    # Set stdout and stdin to non-blocking mode
    os.set_blocking(process.stdout.fileno(), False)

    last_update_time = time.time()
    terminal_output = st.empty()
    full_output = f"{command}\n"
    while True:
        # Check if we have output to read
        if select.select([process.stdout], [], [], 0)[0]:
            output = process.stdout.readline()
            if output:
                full_output += output
                if price_estimation:
                    to_show = full_output
                    terminal_output.code(to_show, language="bash")
                else:
                    current_time = time.time()
                    if current_time - last_update_time > update_interval:
                        lines = full_output.split("\n")
                        if len(lines) < 5:
                            to_show = full_output
                        else:
                            to_show = "\n".join(["...\n..\n.\n"] + lines[-5:])
                        terminal_output.code(to_show, language="bash")
                        last_update_time = current_time
                print(output)
            time.sleep(0.1)
        # Check if the process has finished
        if process.poll() is not None:
            # Read any remaining output
            remaining_output = process.stdout.read()
            if remaining_output:
                lines = remaining_output.split("\n")
                if len(lines) > 10:
                    to_show += "\n".join(["\n...\n..\n.\n"] + lines[-10:])
                else:
                    to_show += remaining_output
                terminal_output.code(to_show, language="bash")
                print(remaining_output)
            break

    return_code = process.poll()
    return outdir, return_code


def main():
    # init lang
    st.session_state["korean"] = st.session_state.get("korean", False)

    sidebar_placeholder = default_page_setting()
    set_nav_bar(
        False, sidebar_placeholder=sidebar_placeholder, toggle_hashstr="app_init"
    )

    st.title("⚔️ VARCO ARENA ⚔️")
    if st.session_state.korean:
        st.write(
            "**VARCO Arena는 각 모델의 생성된 결과를 비교 평가하여 모델의 성능 순위를 제공하는 시스템입니다. 커스텀 테스트셋에 유용하게 활용할 수 있으며, 모범답안을 필요로 하지 않습니다.**"
        )
    else:
        st.write(
            "**VARCO Arena is an LLM benchmarking system that compares model responses across customized test scenarios without requiring reference answers.**"
        )

    st.divider()
    # Set up the file uploader
    if st.session_state.korean:
        st.markdown("모델 출력파일 업로드")
    else:
        st.markdown("### 1. Upload LLM responses")
    uploaded_files = st.file_uploader(
        "Drag and Drop jsonlines files (.jsonl)", accept_multiple_files=True
    )

    # upload state
    if "upfiles_dir" not in st.session_state:
        st.session_state.upfiles_dir = None
    if st.button("Upload Files"):
        st.session_state.upfiles_dir = upload_files(uploaded_files)
        # st.success(st.session_state.upfiles_dir)

    # st.markdown("**💥주의: 중복된 테스트 시나리오는 오류로 처리됩니다💥**")
    if st.session_state.korean:
        with st.expander("❓❔ 무엇을 업로드 하나요❓❔"):
            st.info(open("guide_mds/input_jsonls_kr.md", encoding="UTF8").read())
    else:
        with st.expander("❓❔  What should I upload ❓❔"):
            st.info(open("guide_mds/input_jsonls_en.md", encoding="UTF8").read())

    # Form for cost estimation
    with st.form("cost_estimation_form"):
        if st.session_state.korean:
            st.write("### 2. 가격 산정")
        else:
            st.write("### 2. Cost Estimation")
        eval_model = st.selectbox(
            "Select Judge",
            open("eval_models_list.txt", encoding="UTF8").read().split("\n"),
        )
        promptname = st.selectbox(
            "Select Evalutaion Prompt",
            open("eval_prompt_list.txt", encoding="UTF8").read().split("\n"),
        )
        if promptname == USR_SUB:
            raise ValueError(
                f"{USR_SUB=} is preserved name for the system. Consider another naming for the prompt or consider changing {VA_ROOT=} (USR_SUB == VA_ROOT.parts[-1])."
            )
        estimate_button = st.form_submit_button("Calculate Cost!")
        with st.expander(
            "LLM Judge에 활용되는 프롬프트 (`Calculate Cost!` 클릭시 갱신)"
            if st.session_state.korean
            else "**Evaluation Prompt for LLM Judge (will refresh after `Calculate Cost!` clicked)**"
        ):
            prompt = load_prompt(promptname, task="-")
            kwargs = dict(
                inst="{inst}",
                src="{src}",
                out_a="{out_a}",
                out_b="{out_b}",
                task="-",
            )
            if promptname == "translation_pair":
                kwargs["source_lang"] = "{source_lang}"
                kwargs["target_lang"] = "{target_lang}"
            prompt_cmpl = prompt.complete_prompt(**kwargs)

            st.markdown(f"### Evaluation Prompt: {promptname}")
            for msg in prompt_cmpl:
                st.markdown(f"**{msg['role']}**")
                st.info(show_linebreak_in_md(escape_markdown(msg["content"])))

        if estimate_button:
            if st.session_state.get("upfiles_dir") is None:
                st.error(
                    "❌ Requirements: You have to upload jsonlines files first to proceed"
                )
            else:
                st.markdown("##### Estimated Cost")
                dummy_api_key = "dummy"
                dummy_exp_name = "dummy"
                result_file_path, return_code = run_varco_arena(
                    # upload_dir=st.session_state.upfiles_dir,
                    promptname=promptname,
                    api_key=dummy_api_key,
                    exp_name=dummy_exp_name,
                    price_estimation=True,
                    evaluation_model=eval_model,
                )
                if return_code:
                    st.error("❌ RuntimeError: An error occurred during cost estimation")
                else:
                    st.success("✅ Cost estimation completed successfully")
                    st.session_state.cost_estimated = True

    # Form for actual run
    with st.form("run_arena_form"):
        if st.session_state.korean:
            st.write("### 3. Varco Arena 구동하기")
        else:
            st.write("### 3. Run Varco Arena")
        api_key = st.text_input("Enter your OpenAI API Key", type="password")
        exp_name = st.text_input("(Optional) Enter Exp. name")
        exp_name = exp_name.replace(
            "..", "_"
        )  # May cause rmtree problem later. Block it.
        exp_name = exp_name.replace(
            USR_SUB, f"-{USR_SUB}-"
        )  # May cause rmtree problem later. Block it.
        exp_name = exp_name.replace("/", "-")
        exp_name = exp_name.replace(" ", "_")
        exp_name = exp_name.replace("~", "_")

        if st.session_state.korean:
            st.write("**주의**:`Ctrl+C` 버튼은 구현되지 않았습니다. 구동 전 숙고해주세요.")
        else:
            st.write("**Caution: `Ctrl+C` button hasn't been implemented.**")
        run_button = st.form_submit_button(
            "🔥 Run Arena!",
            disabled=(not st.session_state.get("cost_estimated", False))
            or "result_file_path"
            in st.session_state.keys(),  # run already performed once
        )

        if run_button:
            set_nav_bar(
                True,
                sidebar_placeholder=sidebar_placeholder,
                toggle_hashstr="app_during_run",
            )
            if st.session_state.get("upfiles_dir") is None:
                st.error(
                    "❌ Requirements: You have to upload jsonlines files first to proceed"
                )
            elif not api_key:
                st.error("❌ Requirements: OpenAI key required to run VA.")
            else:
                result_file_path, return_code = run_varco_arena(
                    # upload_dir=st.session_state.upfiles_dir,
                    promptname=promptname,
                    api_key=api_key,
                    exp_name=exp_name,
                    price_estimation=False,
                    evaluation_model=eval_model,
                )
                if return_code:
                    st.error("❌ RuntimeError: An error occurred during Varco Arena run")
                else:
                    st.success("✅ Varco Arena run completed successfully")
                    st.session_state.result_file_path = result_file_path
    set_nav_bar(
        False, sidebar_placeholder=sidebar_placeholder, toggle_hashstr="app_run_done"
    )


if __name__ == "__main__":
    main()