File size: 5,689 Bytes
c2ba4d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import argparse
import asyncio
import os
import platform
from typing import Literal

from calc_cost import calculate
from varco_arena_core.data_utils import load_all_data
from varco_arena_core.manager import Manager

if platform.system() == "Windows":
    try:
        import winloop

        asyncio.set_event_loop_policy(winloop.EventLoopPolicy())
    except ImportError:
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
elif platform.system() == "Linux":
    try:
        import uvloop

        asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
    except ImportError:
        pass


def main(
    input_str,
    output_dir,
    evaluation_model,
    matching_method,
    n_jobs: int = 8,
    evalprompt: Literal[
        "llmbar_brief",
        "llmbar",  # general assistant eval
        "translation_pair",  # translation eval
        "rag_pair_kr",  # rag knowledge reflection eval
        # "contextual_pair",  # contextual visual-language instruction eval
    ] = "llmbar",
):
    dataset_df = load_all_data(input_str)
    if os.path.isfile(output_dir):
        _output_dir = os.path.abspath(os.path.dirname(output_dir))
        print(
            f"output directory '{output_dir}' is not a directory. we'll use '{_output_dir}' instead."
        )
        output_dir = _output_dir
    else:
        os.makedirs(output_dir, exist_ok=True)

    # cost estimation
    total_matches, total_toks_in, total_toks_out, total_costs = calculate(
        dataset_df=dataset_df,
        model_name=evaluation_model,
        matching_method=matching_method,
        evalprompt=evalprompt,
    )

    _doubleline = "=" * 50
    message = f"""---------------------------------------
Judge LLM: {evaluation_model}
평가 ν”„λ‘¬ν”„νŠΈ: {evalprompt}
평가 리그 방법: {matching_method}
μ˜ˆμƒ 평가 횟수 : {total_matches:,}
μ˜ˆμƒ μž…λ ₯ 토큰 : {total_toks_in:,}
μ˜ˆμƒ 좜λ ₯ 토큰 : {total_toks_out:,} (with x1.01 additional room)
---------------------------------------
μ˜ˆμƒ λ°œμƒ λΉ„μš© : ${total_costs:.3f}
{_doubleline}"""
    print(message)

    if args.calc_price_run:
        return

    # prompt user whether to continue
    flag = input("[*] Run Varco Arena? (y/n) : ")
    if not flag.lower() == "y" and not flag.lower() == "yes":
        print("[-] Varco Arena Stopped")
        return

    manager = Manager(
        dataset_df,
        output_dir,
        evaluation_model,
        matching_method,
        n_jobs=n_jobs,
        evalprompt=evalprompt,
    )
    loop = asyncio.get_event_loop()
    loop.run_until_complete(manager.async_run())


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", help="input file")
    parser.add_argument("-o", "--output_dir", help="output file")
    parser.add_argument(
        "-e", "--evaluation_model", default="debug", help="evaluation model specifier"
    )
    parser.add_argument(
        "-c",
        "--calc_price_run",
        action="store_true",
        help="print out price calculations",
    )
    parser.add_argument(
        "-m",
        "--matching_method",
        default="tournament",
        choices=["tournament"],  # , "league"],
        help="matching method specifier",
    )
    parser.add_argument(
        "-k",
        "--openai_api_key",
        default=None,
        help='openai key to use / default: OpenAI API Key from your env variable "OPENAI_API_KEY"',
    )
    parser.add_argument(
        "-u",
        "--openai_url",
        default="https://api.openai.com/v1",
        help="OpenAI BASE URL",
    )
    # advanced options
    parser.add_argument(
        "-j",
        "--n_jobs",
        default=32,
        type=int,
        help="number of concurrency for asyncio (passed to async.semaphore @ manager.py)\nIf your job does not proceed, consider lowering this.",
    )
    parser.add_argument(
        "-p",
        "--evalprompt",
        default="llmbar_brief",
        choices=[
            "llmbar_brief",
            "llmbar",
            "translation_pair",
            "rag_pair_kr",
            # "contextual_pair",
        ],
    )

    parser.add_argument(
        "-lr",
        "--limit_requests",
        default=7_680,
        type=int,
        help="limit number of requests per minute when using vLLM Server",
    )
    parser.add_argument(
        "-lt",
        "--limit_tokens",
        default=15_728_640,
        type=int,
        help="limit number of tokens per minute when using vLLM Server",
    )
    args = parser.parse_args()

    # clientλŠ” openai key μ„ΈνŒ… 이슈둜 μ•ˆμͺ½μ—μ„œ import. μ—¬κΈ°μ„  environ var 둜 μ„€μ •
    # base URL μ„€μ •
    if not args.openai_url.startswith("https://") and not args.openai_url.startswith(
        "http://"
    ):
        args.openai_url = "http://" + args.openai_url
    if not args.openai_url.endswith("/v1"):
        args.openai_url += "/v1"
    os.environ["OPENAI_BASE_URL"] = args.openai_url

    # API Key μ„€μ •: μ£Όμ–΄μ§„κ²Œ 있으면 ν™˜κ²½λ³€μˆ˜ λŒ€μ‹  μ“°κ³ , μ•„λ‹ˆλ©΄ ν™˜κ²½λ³€μˆ˜μ—μ„œ μ°ΎλŠ”λ‹€
    if args.openai_api_key is None:
        if os.getenv("OPENAI_API_KEY") is None:
            raise ValueError(
                "`--openai_api_key` or environment variable `OPENAI_API_KEY` is required"
            )
    else:
        os.environ["OPENAI_API_KEY"] = args.openai_api_key

    # limit μ„€μ •
    os.environ["LIMIT_REQUESTS"] = str(args.limit_requests)
    os.environ["LIMIT_TOKENS"] = str(args.limit_tokens)

    main(
        args.input,
        args.output_dir,
        args.evaluation_model,
        args.matching_method,
        n_jobs=args.n_jobs,
        evalprompt=args.evalprompt,
    )