Spaces:
Sleeping
Sleeping
update
Browse files- Dockerfile +1 -1
- data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/arc-easy-1000-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl +3 -0
- examples/make_raw_dataset/step_3_filter_by_keywords.py +19 -2
- examples/test_metrics/bingoplus_chat_metric.py +2 -2
- examples/test_metrics/lingoace_chat_metric.py +2 -2
- llm_eval_script/byteplus.py +2 -1
- llm_eval_script/byteplus_chat.py +4 -3
- llm_eval_script/gemini_google.py +49 -7
- llm_eval_script/gemini_google_chat.py +8 -4
- main.py +1 -0
Dockerfile
CHANGED
|
@@ -5,7 +5,7 @@ WORKDIR /code
|
|
| 5 |
COPY . /code
|
| 6 |
|
| 7 |
RUN apt-get update
|
| 8 |
-
RUN apt-get install -y wget unzip ffmpeg build-essential git
|
| 9 |
|
| 10 |
RUN pip install --upgrade pip
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
|
|
|
| 5 |
COPY . /code
|
| 6 |
|
| 7 |
RUN apt-get update
|
| 8 |
+
RUN apt-get install -y wget unzip ffmpeg build-essential git git-lfs
|
| 9 |
|
| 10 |
RUN pip install --upgrade pip
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-200-chat.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a574d56126be957ef4d283af06243125886f7544ccaa5bbbe0b01900abe2c62f
|
| 3 |
+
size 2417697
|
data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-bingoplus-ph-90-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:118787cf7fd66a6683864ff4b79fc648c7d17c65b420c25092c14857c75674ed
|
| 3 |
+
size 258515
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab42fc8b853062a9391db33fe890869e7f61e7f9c118ea2c84e3c3555768ca00
|
| 3 |
+
size 2419510
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d738dbb5fa0aef7cc3880b0ec50f2a54143ce586b74bb3c1cffe009f53344dc
|
| 3 |
+
size 258673
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ae30069ee95459c290f53eb50dcb72cb2c11a8a7c3691a96006f4d462dd767b
|
| 3 |
+
size 1211487
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ea3a2b7e5c28a98464352433baecdb7f6c011046d6853282709f7b62ca1386c
|
| 3 |
+
size 874387
|
data/eval_data/gemini_google/google/llama-4-maverick-17b-128e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/arc-easy-1000-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:970ffc784ca83d2ce6e826d3303590d0646f77395bdd832fa809cf09dad46529
|
| 3 |
+
size 720927
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f762c204ac2438aebe08f143bbffddd10d2e94701dd787b103506c09c79f1c1b
|
| 3 |
+
size 2471787
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-90-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6963aa07be72dff967b2388cb4d0303ed76624ba7b48f3f5861c9b207c08448
|
| 3 |
+
size 258578
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-400-choice.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b38cb68452d6f237d275aa03a6c589ece653d4f8ecd5e808d41bb0ac729d850
|
| 3 |
+
size 1211826
|
data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40aab0bb0dd05948d878e0ffab0cb84eca630530079619bd79744957cf42bef2
|
| 3 |
+
size 874346
|
examples/make_raw_dataset/step_3_filter_by_keywords.py
CHANGED
|
@@ -50,12 +50,29 @@ def main():
|
|
| 50 |
|
| 51 |
for key_str in [
|
| 52 |
# "BingoPlus",
|
| 53 |
-
" COD ",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
]:
|
| 55 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
| 56 |
print(f"process: {sample_dir.as_posix()}")
|
| 57 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
| 58 |
-
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
| 60 |
shutil.move(
|
| 61 |
sample_dir.as_posix(),
|
|
|
|
| 50 |
|
| 51 |
for key_str in [
|
| 52 |
# "BingoPlus",
|
| 53 |
+
# " COD ",
|
| 54 |
+
# "NXPay",
|
| 55 |
+
# "NX Money",
|
| 56 |
+
# "Exodus Telecom",
|
| 57 |
+
# "Exodus Retail",
|
| 58 |
+
"Exodus Automotive",
|
| 59 |
+
# "kta kilat", "KTA KILAT",
|
| 60 |
+
# "NXCloud",
|
| 61 |
+
# "作为VIP客户",
|
| 62 |
+
"FedEx",
|
| 63 |
]:
|
| 64 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
| 65 |
print(f"process: {sample_dir.as_posix()}")
|
| 66 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
| 67 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
| 68 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
|
| 69 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
|
| 70 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
|
| 71 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
|
| 72 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
|
| 73 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
|
| 74 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
|
| 75 |
+
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
|
| 76 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
| 77 |
shutil.move(
|
| 78 |
sample_dir.as_posix(),
|
examples/test_metrics/bingoplus_chat_metric.py
CHANGED
|
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
| 38 |
)
|
| 39 |
parser.add_argument(
|
| 40 |
"--eval_data_file",
|
| 41 |
-
default=(project_path / "data/eval_data/gemini_google/google/
|
| 42 |
type=str
|
| 43 |
)
|
| 44 |
parser.add_argument(
|
| 45 |
"--output_file",
|
| 46 |
-
default=(project_path / "data/eval_data/gemini_google/google/
|
| 47 |
type=str
|
| 48 |
)
|
| 49 |
parser.add_argument(
|
|
|
|
| 38 |
)
|
| 39 |
parser.add_argument(
|
| 40 |
"--eval_data_file",
|
| 41 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
|
| 42 |
type=str
|
| 43 |
)
|
| 44 |
parser.add_argument(
|
| 45 |
"--output_file",
|
| 46 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
| 47 |
type=str
|
| 48 |
)
|
| 49 |
parser.add_argument(
|
examples/test_metrics/lingoace_chat_metric.py
CHANGED
|
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
| 43 |
)
|
| 44 |
parser.add_argument(
|
| 45 |
"--eval_data_file",
|
| 46 |
-
default=(project_path / "data/eval_data/
|
| 47 |
type=str
|
| 48 |
)
|
| 49 |
parser.add_argument(
|
| 50 |
"--output_file",
|
| 51 |
-
default=(project_path / "data/eval_data/
|
| 52 |
type=str
|
| 53 |
)
|
| 54 |
parser.add_argument(
|
|
|
|
| 43 |
)
|
| 44 |
parser.add_argument(
|
| 45 |
"--eval_data_file",
|
| 46 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
|
| 47 |
type=str
|
| 48 |
)
|
| 49 |
parser.add_argument(
|
| 50 |
"--output_file",
|
| 51 |
+
default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl").as_posix(),
|
| 52 |
type=str
|
| 53 |
)
|
| 54 |
parser.add_argument(
|
llm_eval_script/byteplus.py
CHANGED
|
@@ -49,8 +49,9 @@ def get_args():
|
|
| 49 |
)
|
| 50 |
parser.add_argument(
|
| 51 |
"--eval_dataset_name",
|
|
|
|
| 52 |
# default="agent-lingoace-zh-400-choice.jsonl",
|
| 53 |
-
default="arc-easy-1000-choice.jsonl",
|
| 54 |
type=str
|
| 55 |
)
|
| 56 |
parser.add_argument(
|
|
|
|
| 49 |
)
|
| 50 |
parser.add_argument(
|
| 51 |
"--eval_dataset_name",
|
| 52 |
+
default="agent-bingoplus-ph-90-choice.jsonl",
|
| 53 |
# default="agent-lingoace-zh-400-choice.jsonl",
|
| 54 |
+
# default="arc-easy-1000-choice.jsonl",
|
| 55 |
type=str
|
| 56 |
)
|
| 57 |
parser.add_argument(
|
llm_eval_script/byteplus_chat.py
CHANGED
|
@@ -42,14 +42,15 @@ def get_args():
|
|
| 42 |
parser = argparse.ArgumentParser()
|
| 43 |
parser.add_argument(
|
| 44 |
"--model_name",
|
| 45 |
-
default="seed-1-6-250615",
|
| 46 |
-
|
| 47 |
# default="deepseek-v3-250324",
|
| 48 |
type=str
|
| 49 |
)
|
| 50 |
parser.add_argument(
|
| 51 |
"--eval_dataset_name",
|
| 52 |
-
default="agent-lingoace-zh-80-chat.jsonl",
|
|
|
|
| 53 |
type=str
|
| 54 |
)
|
| 55 |
parser.add_argument(
|
|
|
|
| 42 |
parser = argparse.ArgumentParser()
|
| 43 |
parser.add_argument(
|
| 44 |
"--model_name",
|
| 45 |
+
# default="seed-1-6-250615",
|
| 46 |
+
default="seed-1-6-flash-250615",
|
| 47 |
# default="deepseek-v3-250324",
|
| 48 |
type=str
|
| 49 |
)
|
| 50 |
parser.add_argument(
|
| 51 |
"--eval_dataset_name",
|
| 52 |
+
# default="agent-lingoace-zh-80-chat.jsonl",
|
| 53 |
+
default="agent-bingoplus-ph-200-chat.jsonl",
|
| 54 |
type=str
|
| 55 |
)
|
| 56 |
parser.add_argument(
|
llm_eval_script/gemini_google.py
CHANGED
|
@@ -1,5 +1,25 @@
|
|
| 1 |
#!/usr/bin/python3
|
| 2 |
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import argparse
|
| 4 |
from datetime import datetime
|
| 5 |
import json
|
|
@@ -25,13 +45,17 @@ def get_args():
|
|
| 25 |
"--model_name",
|
| 26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
| 27 |
# default="gemini-2.5-flash",
|
| 28 |
-
default="gemini-2.5-flash-lite-preview-06-17",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
type=str
|
| 30 |
)
|
| 31 |
parser.add_argument(
|
| 32 |
"--eval_dataset_name",
|
| 33 |
-
default="agent-bingoplus-ph-90-choice.jsonl",
|
| 34 |
-
|
| 35 |
# default="arc-easy-1000-choice.jsonl",
|
| 36 |
type=str
|
| 37 |
)
|
|
@@ -55,6 +79,17 @@ def get_args():
|
|
| 55 |
default="google_potent_veld_462405_t3",
|
| 56 |
type=str
|
| 57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
args = parser.parse_args()
|
| 59 |
return args
|
| 60 |
|
|
@@ -79,9 +114,13 @@ def main():
|
|
| 79 |
eval_data_dir = Path(args.eval_data_dir)
|
| 80 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
| 87 |
|
|
@@ -91,7 +130,8 @@ def main():
|
|
| 91 |
client = genai.Client(
|
| 92 |
vertexai=True,
|
| 93 |
project=project_id,
|
| 94 |
-
location="global",
|
|
|
|
| 95 |
)
|
| 96 |
generate_content_config = types.GenerateContentConfig(
|
| 97 |
top_p=0.95,
|
|
@@ -137,6 +177,8 @@ def main():
|
|
| 137 |
]
|
| 138 |
)
|
| 139 |
]
|
|
|
|
|
|
|
| 140 |
time_begin = time.time()
|
| 141 |
llm_response: types.GenerateContentResponse = client.models.generate_content(
|
| 142 |
model=args.model_name,
|
|
|
|
| 1 |
#!/usr/bin/python3
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude?hl=zh-cn
|
| 5 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/use-claude?hl=zh-cn
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
Llama
|
| 9 |
+
|
| 10 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama/use-llama?hl=zh-cn
|
| 11 |
+
https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama/use-llama?hl=zh-cn#regions-quotas
|
| 12 |
+
|
| 13 |
+
Model Name
|
| 14 |
+
llama-4-maverick-17b-128e-instruct-maas
|
| 15 |
+
llama-4-scout-17b-16e-instruct-maas
|
| 16 |
+
|
| 17 |
+
区域选择 us-east5
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
"""
|
| 23 |
import argparse
|
| 24 |
from datetime import datetime
|
| 25 |
import json
|
|
|
|
| 45 |
"--model_name",
|
| 46 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
| 47 |
# default="gemini-2.5-flash",
|
| 48 |
+
# default="gemini-2.5-flash-lite-preview-06-17",
|
| 49 |
+
# default="claude-opus-4@20250514",
|
| 50 |
+
# default="claude-sonnet-4@20250514",
|
| 51 |
+
# default="llama-4-maverick-17b-128e-instruct-maas",
|
| 52 |
+
default="llama-4-scout-17b-16e-instruct-maas",
|
| 53 |
type=str
|
| 54 |
)
|
| 55 |
parser.add_argument(
|
| 56 |
"--eval_dataset_name",
|
| 57 |
+
# default="agent-bingoplus-ph-90-choice.jsonl",
|
| 58 |
+
default="agent-lingoace-zh-400-choice.jsonl",
|
| 59 |
# default="arc-easy-1000-choice.jsonl",
|
| 60 |
type=str
|
| 61 |
)
|
|
|
|
| 79 |
default="google_potent_veld_462405_t3",
|
| 80 |
type=str
|
| 81 |
)
|
| 82 |
+
parser.add_argument(
|
| 83 |
+
"--create_time_str",
|
| 84 |
+
# default="null",
|
| 85 |
+
default="20250731_162116",
|
| 86 |
+
type=str
|
| 87 |
+
)
|
| 88 |
+
parser.add_argument(
|
| 89 |
+
"--interval",
|
| 90 |
+
default=1,
|
| 91 |
+
type=int
|
| 92 |
+
)
|
| 93 |
args = parser.parse_args()
|
| 94 |
return args
|
| 95 |
|
|
|
|
| 114 |
eval_data_dir = Path(args.eval_data_dir)
|
| 115 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
| 116 |
|
| 117 |
+
if args.create_time_str == "null":
|
| 118 |
+
tz = ZoneInfo("Asia/Shanghai")
|
| 119 |
+
now = datetime.now(tz)
|
| 120 |
+
create_time_str = now.strftime("%Y%m%d_%H%M%S")
|
| 121 |
+
# create_time_str = "20250729-interval-5"
|
| 122 |
+
else:
|
| 123 |
+
create_time_str = args.create_time_str
|
| 124 |
|
| 125 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
| 126 |
|
|
|
|
| 130 |
client = genai.Client(
|
| 131 |
vertexai=True,
|
| 132 |
project=project_id,
|
| 133 |
+
# location="global",
|
| 134 |
+
location="us-east5",
|
| 135 |
)
|
| 136 |
generate_content_config = types.GenerateContentConfig(
|
| 137 |
top_p=0.95,
|
|
|
|
| 177 |
]
|
| 178 |
)
|
| 179 |
]
|
| 180 |
+
time.sleep(args.interval)
|
| 181 |
+
print(f"sleep: {args.interval}")
|
| 182 |
time_begin = time.time()
|
| 183 |
llm_response: types.GenerateContentResponse = client.models.generate_content(
|
| 184 |
model=args.model_name,
|
llm_eval_script/gemini_google_chat.py
CHANGED
|
@@ -25,7 +25,9 @@ def get_args():
|
|
| 25 |
"--model_name",
|
| 26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
| 27 |
# default="gemini-2.5-flash",
|
| 28 |
-
default="gemini-2.5-flash-lite-preview-06-17",
|
|
|
|
|
|
|
| 29 |
type=str
|
| 30 |
)
|
| 31 |
parser.add_argument(
|
|
@@ -57,12 +59,12 @@ def get_args():
|
|
| 57 |
parser.add_argument(
|
| 58 |
"--create_time_str",
|
| 59 |
# default="null",
|
| 60 |
-
default="
|
| 61 |
type=str
|
| 62 |
)
|
| 63 |
parser.add_argument(
|
| 64 |
"--interval",
|
| 65 |
-
default=
|
| 66 |
type=int
|
| 67 |
)
|
| 68 |
args = parser.parse_args()
|
|
@@ -105,7 +107,9 @@ def main():
|
|
| 105 |
client = genai.Client(
|
| 106 |
vertexai=True,
|
| 107 |
project=project_id,
|
| 108 |
-
location="global",
|
|
|
|
|
|
|
| 109 |
)
|
| 110 |
generate_content_config = types.GenerateContentConfig(
|
| 111 |
top_p=0.95,
|
|
|
|
| 25 |
"--model_name",
|
| 26 |
# default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0.
|
| 27 |
# default="gemini-2.5-flash",
|
| 28 |
+
# default="gemini-2.5-flash-lite-preview-06-17",
|
| 29 |
+
# default="llama-4-maverick-17b-128e-instruct-maas",
|
| 30 |
+
default="llama-4-scout-17b-16e-instruct-maas",
|
| 31 |
type=str
|
| 32 |
)
|
| 33 |
parser.add_argument(
|
|
|
|
| 59 |
parser.add_argument(
|
| 60 |
"--create_time_str",
|
| 61 |
# default="null",
|
| 62 |
+
default="20250731_162116",
|
| 63 |
type=str
|
| 64 |
)
|
| 65 |
parser.add_argument(
|
| 66 |
"--interval",
|
| 67 |
+
default=1,
|
| 68 |
type=int
|
| 69 |
)
|
| 70 |
args = parser.parse_args()
|
|
|
|
| 107 |
client = genai.Client(
|
| 108 |
vertexai=True,
|
| 109 |
project=project_id,
|
| 110 |
+
# location="global",
|
| 111 |
+
location="us-east5",
|
| 112 |
+
|
| 113 |
)
|
| 114 |
generate_content_config = types.GenerateContentConfig(
|
| 115 |
top_p=0.95,
|
main.py
CHANGED
|
@@ -17,6 +17,7 @@ docker run -itd \
|
|
| 17 |
--name llm_eval_system_7862 \
|
| 18 |
--restart=always \
|
| 19 |
--network host \
|
|
|
|
| 20 |
python:3.12 \
|
| 21 |
/bin/bash
|
| 22 |
|
|
|
|
| 17 |
--name llm_eval_system_7862 \
|
| 18 |
--restart=always \
|
| 19 |
--network host \
|
| 20 |
+
-v /data/tianxing/PycharmProjects/llm_eval_system:/data/tianxing/PycharmProjects/llm_eval_system \
|
| 21 |
python:3.12 \
|
| 22 |
/bin/bash
|
| 23 |
|