intersteller2887 commited on
Commit
fb691f0
·
verified ·
1 Parent(s): 03c4a10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -8
app.py CHANGED
@@ -11,6 +11,8 @@ dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
11
 
12
  target_audio_dir = "/home/user/app/audio"
13
  os.makedirs(target_audio_dir, exist_ok=True)
 
 
14
 
15
  local_audio_paths = []
16
 
@@ -112,9 +114,49 @@ DIMENSIONS_DATA = [
112
 
113
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
114
 
115
- random.seed()
116
- selected_audio_paths = random.sample(all_data_audio_paths, 5)
117
- print(selected_audio_paths)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  QUESTION_SET = [
120
  {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
@@ -291,8 +333,8 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
291
  gr.update(), gr.update(),
292
  ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
293
 
294
- def save_all_results_to_file(all_results, user_data):
295
- repo_id = "Hu6ery/Turing-Test-Submissions"
296
  username = user_data.get("age", "user")
297
  filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json"
298
  final_data_package = {
@@ -310,7 +352,7 @@ def save_all_results_to_file(all_results, user_data):
310
  api = HfApi()
311
  api.upload_file(
312
  path_or_fileobj=bytes(json_string, "utf-8"),
313
- path_in_repo=f"data/{filename}",
314
  repo_id=repo_id,
315
  repo_type="dataset",
316
  token=hf_token,
@@ -319,7 +361,57 @@ def save_all_results_to_file(all_results, user_data):
319
  print(f"Successfully uploaded results to dataset: {repo_id}")
320
 
321
  except Exception as e:
322
- print(f"Error uploading to Hugging Face Hub: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  def back_to_welcome():
325
  return (
@@ -396,7 +488,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
396
  question_progress_text = gr.Markdown()
397
  test_dimension_title = gr.Markdown()
398
  test_audio = gr.Audio(label="测试音频")
399
- gr.Markdown("--- \n ### 请为以下特征打分 (0-5)")
400
  test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
401
  with gr.Row():
402
  prev_dim_btn = gr.Button("上一维度")
 
11
 
12
  target_audio_dir = "/home/user/app/audio"
13
  os.makedirs(target_audio_dir, exist_ok=True)
14
+ COUNT_JSON_PATH = "/home/user/app/count.json"
15
+ COUNT_JSON_REPO_PATH = "submission/count.json"
16
 
17
  local_audio_paths = []
18
 
 
114
 
115
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
116
 
117
+ def load_or_initialize_count_json(audio_paths):
118
+ if os.path.exists(COUNT_JSON_PATH):
119
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
120
+ count_data = json.load(f)
121
+ else:
122
+ count_data = {}
123
+
124
+ updated = False
125
+ for path in audio_paths:
126
+ filename = os.path.basename(path)
127
+ if filename not in count_data:
128
+ count_data[filename] = 0
129
+ updated = True
130
+
131
+ if updated or not os.path.exists(COUNT_JSON_PATH):
132
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
133
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
134
+
135
+ return count_data
136
+
137
+ def sample_audio_paths(audio_paths, count_data, k=5, max_count=3):
138
+ eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
139
+
140
+ if len(eligible_paths) < k:
141
+ raise ValueError(f"⚠️ 可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
142
+
143
+ selected = random.sample(eligible_paths, k)
144
+ for path in selected:
145
+ filename = os.path.basename(path)
146
+ count_data[filename] += 1
147
+
148
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
149
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
150
+
151
+ return selected, count_data
152
+
153
+ """random.seed()
154
+ selected_audio_paths = random.sample(all_data_audio_paths, 5)"""
155
+ # print(selected_audio_paths)
156
+
157
+ count_data = load_or_initialize_count_json(all_data_audio_paths)
158
+ selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
159
+
160
 
161
  QUESTION_SET = [
162
  {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
 
333
  gr.update(), gr.update(),
334
  ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
335
 
336
+ """def save_all_results_to_file(all_results, user_data):
337
+ repo_id = "intersteller2887/Turing-test-dataset"
338
  username = user_data.get("age", "user")
339
  filename = f"submission_{username}_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.json"
340
  final_data_package = {
 
352
  api = HfApi()
353
  api.upload_file(
354
  path_or_fileobj=bytes(json_string, "utf-8"),
355
+ path_in_repo=f"submissions/{filename}",
356
  repo_id=repo_id,
357
  repo_type="dataset",
358
  token=hf_token,
 
361
  print(f"Successfully uploaded results to dataset: {repo_id}")
362
 
363
  except Exception as e:
364
+ print(f"Error uploading to Hugging Face Hub: {e}")"""
365
+
366
+ def save_all_results_to_file(all_results, user_data, count_data=None):
367
+ repo_id = "Hu6ery/Turing-Test-Submissions"
368
+ username = user_data.get("age", "user")
369
+ timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
370
+ submission_filename = f"submission_{username}_{timestamp}.json"
371
+
372
+ final_data_package = {
373
+ "user_info": user_data,
374
+ "results": all_results
375
+ }
376
+ json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4)
377
+ hf_token = os.getenv("HF_TOKEN")
378
+
379
+ if not hf_token:
380
+ print("HF_TOKEN not found. Cannot upload to the Hub.")
381
+ return
382
+
383
+ try:
384
+ api = HfApi()
385
+
386
+ # 上传 submission 文件
387
+ api.upload_file(
388
+ path_or_fileobj=bytes(json_string, "utf-8"),
389
+ path_in_repo=f"data/{submission_filename}",
390
+ repo_id=repo_id,
391
+ repo_type="dataset",
392
+ token=hf_token,
393
+ commit_message=f"Add new submission from {username}"
394
+ )
395
+ print(f"上传成功: {submission_filename}")
396
+
397
+ # 上传 count.json(如果提供)
398
+ if count_data:
399
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
400
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
401
+
402
+ api.upload_file(
403
+ path_or_fileobj=COUNT_JSON_PATH,
404
+ path_in_repo=COUNT_JSON_REPO_PATH,
405
+ repo_id=repo_id,
406
+ repo_type="dataset",
407
+ token=hf_token,
408
+ commit_message=f"Update count.json after submission by {username}"
409
+ )
410
+ print("count.json 上传成功")
411
+
412
+ except Exception as e:
413
+ print(f"上传出错: {e}")
414
+
415
 
416
  def back_to_welcome():
417
  return (
 
488
  question_progress_text = gr.Markdown()
489
  test_dimension_title = gr.Markdown()
490
  test_audio = gr.Audio(label="测试音频")
491
+ gr.Markdown("--- \n ### 请为以下特征打分 (0-5分。0代表认为回应内容在该特征上完全表现出机器特征;5代表完全表现出人类特征)")
492
  test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
493
  with gr.Row():
494
  prev_dim_btn = gr.Button("上一维度")