Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
|
|
11 |
|
12 |
target_audio_dir = "/home/user/app/audio"
|
13 |
os.makedirs(target_audio_dir, exist_ok=True)
|
|
|
|
|
14 |
|
15 |
local_audio_paths = []
|
16 |
|
@@ -112,9 +114,49 @@ DIMENSIONS_DATA = [
|
|
112 |
|
113 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
QUESTION_SET = [
|
120 |
{"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
|
@@ -291,8 +333,8 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
291 |
gr.update(), gr.update(),
|
292 |
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
|
293 |
|
294 |
-
def save_all_results_to_file(all_results, user_data):
|
295 |
-
repo_id = "
|
296 |
username = user_data.get("age", "user")
|
297 |
filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json"
|
298 |
final_data_package = {
|
@@ -310,7 +352,7 @@ def save_all_results_to_file(all_results, user_data):
|
|
310 |
api = HfApi()
|
311 |
api.upload_file(
|
312 |
path_or_fileobj=bytes(json_string, "utf-8"),
|
313 |
-
path_in_repo=f"
|
314 |
repo_id=repo_id,
|
315 |
repo_type="dataset",
|
316 |
token=hf_token,
|
@@ -319,7 +361,57 @@ def save_all_results_to_file(all_results, user_data):
|
|
319 |
print(f"Successfully uploaded results to dataset: {repo_id}")
|
320 |
|
321 |
except Exception as e:
|
322 |
-
print(f"Error uploading to Hugging Face Hub: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
|
324 |
def back_to_welcome():
|
325 |
return (
|
@@ -396,7 +488,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
|
|
396 |
question_progress_text = gr.Markdown()
|
397 |
test_dimension_title = gr.Markdown()
|
398 |
test_audio = gr.Audio(label="测试音频")
|
399 |
-
gr.Markdown("--- \n ### 请为以下特征打分 (0-5
|
400 |
test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
|
401 |
with gr.Row():
|
402 |
prev_dim_btn = gr.Button("上一维度")
|
|
|
11 |
|
12 |
target_audio_dir = "/home/user/app/audio"
|
13 |
os.makedirs(target_audio_dir, exist_ok=True)
|
14 |
+
COUNT_JSON_PATH = "/home/user/app/count.json"
|
15 |
+
COUNT_JSON_REPO_PATH = "submission/count.json"
|
16 |
|
17 |
local_audio_paths = []
|
18 |
|
|
|
114 |
|
115 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
116 |
|
117 |
+
def load_or_initialize_count_json(audio_paths):
|
118 |
+
if os.path.exists(COUNT_JSON_PATH):
|
119 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
120 |
+
count_data = json.load(f)
|
121 |
+
else:
|
122 |
+
count_data = {}
|
123 |
+
|
124 |
+
updated = False
|
125 |
+
for path in audio_paths:
|
126 |
+
filename = os.path.basename(path)
|
127 |
+
if filename not in count_data:
|
128 |
+
count_data[filename] = 0
|
129 |
+
updated = True
|
130 |
+
|
131 |
+
if updated or not os.path.exists(COUNT_JSON_PATH):
|
132 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
133 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
134 |
+
|
135 |
+
return count_data
|
136 |
+
|
137 |
+
def sample_audio_paths(audio_paths, count_data, k=5, max_count=3):
|
138 |
+
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
139 |
+
|
140 |
+
if len(eligible_paths) < k:
|
141 |
+
raise ValueError(f"⚠️ 可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
142 |
+
|
143 |
+
selected = random.sample(eligible_paths, k)
|
144 |
+
for path in selected:
|
145 |
+
filename = os.path.basename(path)
|
146 |
+
count_data[filename] += 1
|
147 |
+
|
148 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
149 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
150 |
+
|
151 |
+
return selected, count_data
|
152 |
+
|
153 |
+
"""random.seed()
|
154 |
+
selected_audio_paths = random.sample(all_data_audio_paths, 5)"""
|
155 |
+
# print(selected_audio_paths)
|
156 |
+
|
157 |
+
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
158 |
+
selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
|
159 |
+
|
160 |
|
161 |
QUESTION_SET = [
|
162 |
{"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
|
|
|
333 |
gr.update(), gr.update(),
|
334 |
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
|
335 |
|
336 |
+
"""def save_all_results_to_file(all_results, user_data):
|
337 |
+
repo_id = "intersteller2887/Turing-test-dataset"
|
338 |
username = user_data.get("age", "user")
|
339 |
filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json"
|
340 |
final_data_package = {
|
|
|
352 |
api = HfApi()
|
353 |
api.upload_file(
|
354 |
path_or_fileobj=bytes(json_string, "utf-8"),
|
355 |
+
path_in_repo=f"submissions/{filename}",
|
356 |
repo_id=repo_id,
|
357 |
repo_type="dataset",
|
358 |
token=hf_token,
|
|
|
361 |
print(f"Successfully uploaded results to dataset: {repo_id}")
|
362 |
|
363 |
except Exception as e:
|
364 |
+
print(f"Error uploading to Hugging Face Hub: {e}")"""
|
365 |
+
|
366 |
+
def save_all_results_to_file(all_results, user_data, count_data=None):
|
367 |
+
repo_id = "Hu6ery/Turing-Test-Submissions"
|
368 |
+
username = user_data.get("age", "user")
|
369 |
+
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
370 |
+
submission_filename = f"submission_{username}_{timestamp}.json"
|
371 |
+
|
372 |
+
final_data_package = {
|
373 |
+
"user_info": user_data,
|
374 |
+
"results": all_results
|
375 |
+
}
|
376 |
+
json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4)
|
377 |
+
hf_token = os.getenv("HF_TOKEN")
|
378 |
+
|
379 |
+
if not hf_token:
|
380 |
+
print("HF_TOKEN not found. Cannot upload to the Hub.")
|
381 |
+
return
|
382 |
+
|
383 |
+
try:
|
384 |
+
api = HfApi()
|
385 |
+
|
386 |
+
# 上传 submission 文件
|
387 |
+
api.upload_file(
|
388 |
+
path_or_fileobj=bytes(json_string, "utf-8"),
|
389 |
+
path_in_repo=f"data/{submission_filename}",
|
390 |
+
repo_id=repo_id,
|
391 |
+
repo_type="dataset",
|
392 |
+
token=hf_token,
|
393 |
+
commit_message=f"Add new submission from {username}"
|
394 |
+
)
|
395 |
+
print(f"上传成功: {submission_filename}")
|
396 |
+
|
397 |
+
# 上传 count.json(如果提供)
|
398 |
+
if count_data:
|
399 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
400 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
401 |
+
|
402 |
+
api.upload_file(
|
403 |
+
path_or_fileobj=COUNT_JSON_PATH,
|
404 |
+
path_in_repo=COUNT_JSON_REPO_PATH,
|
405 |
+
repo_id=repo_id,
|
406 |
+
repo_type="dataset",
|
407 |
+
token=hf_token,
|
408 |
+
commit_message=f"Update count.json after submission by {username}"
|
409 |
+
)
|
410 |
+
print("count.json 上传成功")
|
411 |
+
|
412 |
+
except Exception as e:
|
413 |
+
print(f"上传出错: {e}")
|
414 |
+
|
415 |
|
416 |
def back_to_welcome():
|
417 |
return (
|
|
|
488 |
question_progress_text = gr.Markdown()
|
489 |
test_dimension_title = gr.Markdown()
|
490 |
test_audio = gr.Audio(label="测试音频")
|
491 |
+
gr.Markdown("--- \n ### 请为以下特征打分 (0-5分。0代表认为回应内容在该特征上完全表现出机器特征;5代表完全表现出人类特征)")
|
492 |
test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
|
493 |
with gr.Row():
|
494 |
prev_dim_btn = gr.Button("上一维度")
|