Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import random
|
|
6 |
import shutil
|
7 |
import time
|
8 |
import collections
|
|
|
9 |
from filelock import FileLock
|
10 |
from datasets import load_dataset, Audio
|
11 |
from huggingface_hub import HfApi
|
@@ -362,7 +363,94 @@ def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
|
|
362 |
next_btn_update,
|
363 |
) + tuple(slider_updates)
|
364 |
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
# selections["final_choice"] = final_choice
|
367 |
|
368 |
"""final_question_result = {
|
@@ -395,11 +483,12 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
395 |
all_results.append(final_question_result)
|
396 |
|
397 |
q_idx += 1
|
398 |
-
|
399 |
-
#
|
400 |
if q_idx < len(user_data["question_set"]):
|
401 |
-
init_q_updates = init_test_question(user_data, q_idx)
|
402 |
return init_q_updates + (all_results, gr.update(value=""))
|
|
|
403 |
else:
|
404 |
result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
|
405 |
for res in all_results:
|
@@ -420,9 +509,92 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
|
|
420 |
q_idx, d_idx, {},
|
421 |
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
422 |
gr.update(), gr.update(),
|
423 |
-
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
|
424 |
|
425 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
repo_id = "intersteller2887/Turing-test-dataset"
|
427 |
username = user_data.get("username", "user")
|
428 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
@@ -472,8 +644,56 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
472 |
)
|
473 |
|
474 |
except Exception as e:
|
475 |
-
print(f"上传出错: {e}")
|
476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
def toggle_reference_view(current):
|
479 |
if current == "参考":
|
|
|
6 |
import shutil
|
7 |
import time
|
8 |
import collections
|
9 |
+
from functools iport wraps
|
10 |
from filelock import FileLock
|
11 |
from datasets import load_dataset, Audio
|
12 |
from huggingface_hub import HfApi
|
|
|
363 |
next_btn_update,
|
364 |
) + tuple(slider_updates)
|
365 |
|
366 |
+
# ==============================================================================
|
367 |
+
# 重连函数定义 (Retry Function Definitions)
|
368 |
+
# ==============================================================================
|
369 |
+
|
370 |
+
# Function for handling connection error
|
371 |
+
def retry_with_timeout(max_retries=3, timeout=10, backoff=1):
|
372 |
+
def decorator(func):
|
373 |
+
@wraps(func)
|
374 |
+
def wrapper(*args, **kwargs):
|
375 |
+
last_exception = None
|
376 |
+
for attempt in range(max_retries):
|
377 |
+
try:
|
378 |
+
import signal
|
379 |
+
class TimeoutError(Exception):
|
380 |
+
update_test_dimension_view
|
381 |
+
|
382 |
+
def handle_timeout(signum, frame):
|
383 |
+
raise TimeoutError("Operation timed out")
|
384 |
+
|
385 |
+
signal.signal(signal.SIGALRM, handle_timeout)
|
386 |
+
signal.alarm(timeout)
|
387 |
+
|
388 |
+
try:
|
389 |
+
result = func(*args, **kwargs)
|
390 |
+
signal.alarm(0) # Cancel the clocker
|
391 |
+
return result
|
392 |
+
except TimeoutError:
|
393 |
+
signal.alarm(0)
|
394 |
+
raise
|
395 |
+
except Exception as e:
|
396 |
+
last_exception = e
|
397 |
+
print(f"Attempt {attempt + 1} failed: {str(e)}")
|
398 |
+
if attempt < max_retries - 1:
|
399 |
+
time.sleep(backoff * (attempt + 1))
|
400 |
+
finally:
|
401 |
+
signal.alarm(0) # Guarantee that the clocker will be canceled
|
402 |
+
|
403 |
+
print(f"All {max_retries} attempts failed")
|
404 |
+
if last_exception:
|
405 |
+
raise last_exception
|
406 |
+
raise Exception("Unkown error occured")
|
407 |
+
return wrapper
|
408 |
+
return decorator
|
409 |
+
|
410 |
+
def save_with_retry(all_results, user_data, count_data):
|
411 |
+
try:
|
412 |
+
save_all_results_to_file(all_results, user_data, count_data)
|
413 |
+
return True
|
414 |
+
except Exception as e:
|
415 |
+
print(f"Fail to upload file to HuggingFace Dataset: {e}")
|
416 |
+
return False
|
417 |
+
|
418 |
+
def save_locally_with_retry(data, filename, max_retries=3):
|
419 |
+
for attempt in range(max_retries):
|
420 |
+
try:
|
421 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
422 |
+
json.dump(data, f, indent=4, ensure_ascii=False)
|
423 |
+
return True
|
424 |
+
except Exception as e:
|
425 |
+
print(f"Fail to save file to HugginigFace workspace: {e} for {attempt + 1} time")
|
426 |
+
if attempt < max_retries - 1:
|
427 |
+
time.sleep(1)
|
428 |
+
return False
|
429 |
+
|
430 |
+
def update_count_with_retry(count_data, question_set, max_retries=3):
|
431 |
+
for attempt in range(max_retries):
|
432 |
+
try:
|
433 |
+
lock_path = COUNT_JSON_PATH + ".lock"
|
434 |
+
with FileLock(lock_path, timeout=10):
|
435 |
+
# Remove unfinished question(s) from count.json
|
436 |
+
for question in question_set:
|
437 |
+
filename = os.path.basename(question['audio'])
|
438 |
+
if filename in count_data and count_data[filename] < 1:
|
439 |
+
count_data[filename] = 0 # Mark unfinished data as 0
|
440 |
+
|
441 |
+
with open(COUNT_JSON_PATH, 'w', encoding='utf-8') as f:
|
442 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
443 |
+
return True
|
444 |
+
except Exception as e:
|
445 |
+
print(f"Fail to update count.json {e} for {attempt + 1} time")
|
446 |
+
if attempt < max_retries - 1:
|
447 |
+
time.sleep(1)
|
448 |
+
return False
|
449 |
+
|
450 |
+
# ==============================================================================
|
451 |
+
|
452 |
+
# Previous version of submit_question_and_advance
|
453 |
+
"""def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
|
454 |
# selections["final_choice"] = final_choice
|
455 |
|
456 |
"""final_question_result = {
|
|
|
483 |
all_results.append(final_question_result)
|
484 |
|
485 |
q_idx += 1
|
486 |
+
|
487 |
+
# If q_idx hasn't reached the last one
|
488 |
if q_idx < len(user_data["question_set"]):
|
489 |
+
init_q_updates = init_test_question(user_data, q_idx) # Case 1: jam happens when initialize next question
|
490 |
return init_q_updates + (all_results, gr.update(value=""))
|
491 |
+
# If q_idx has reached the last one
|
492 |
else:
|
493 |
result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
|
494 |
for res in all_results:
|
|
|
509 |
q_idx, d_idx, {},
|
510 |
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
511 |
gr.update(), gr.update(),
|
512 |
+
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)"""
|
513 |
|
514 |
+
def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
|
515 |
+
try:
|
516 |
+
# Data preperation
|
517 |
+
cleaned_selections = {}
|
518 |
+
for dim_title, sub_scores in selections.items():
|
519 |
+
cleaned_selections["final_choice"] = final_choice
|
520 |
+
cleaned_sub_scores = {}
|
521 |
+
for sub_dim, score in sub_scores.items():
|
522 |
+
cleaned_sub_scores[sub_dim] = None if score == 0 else score
|
523 |
+
cleaned_selections[dim_title] = cleaned_sub_scores
|
524 |
+
|
525 |
+
final_question_result = {
|
526 |
+
"question_id": q_idx,
|
527 |
+
"audio_file": user_data["question_set"][q_idx]['audio'],
|
528 |
+
"selections": cleaned_selections
|
529 |
+
}
|
530 |
+
|
531 |
+
all_results.append(final_question_result)
|
532 |
+
q_idx += 1
|
533 |
+
|
534 |
+
if q_idx < len(user_data["question_set"]):
|
535 |
+
init_q_updates = init_test_question(user_data, q_idx)
|
536 |
+
return init_q_updates + (all_results, gr.update(value=""))
|
537 |
+
else:
|
538 |
+
result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
|
539 |
+
for res in all_results:
|
540 |
+
result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
|
541 |
+
for dim_title, dim_data in res['selections'].items():
|
542 |
+
if dim_title == 'final_choice': continue
|
543 |
+
result_str += f"- **{dim_title}**:\n"
|
544 |
+
for sub_dim, score in dim_data.items():
|
545 |
+
result_str += f" - *{sub_dim[:20]}...*: {score}/5\n"
|
546 |
+
|
547 |
+
# 尝试上传(带重试)
|
548 |
+
success = save_with_retry(all_results, user_data, user_data.get("updated_count_data"))
|
549 |
+
|
550 |
+
if not success:
|
551 |
+
username = user_data.get("username", "anonymous")
|
552 |
+
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
553 |
+
local_filename = f"submission_{username}_{timestamp}.json"
|
554 |
+
|
555 |
+
user_info_clean = {
|
556 |
+
k: v for k, v in user_data.items() if k not in ["question_set", "updated_count_data"]
|
557 |
+
}
|
558 |
+
final_data_package = {
|
559 |
+
"user_info": user_info_clean,
|
560 |
+
"results": all_results
|
561 |
+
}
|
562 |
+
|
563 |
+
# 尝试保存到本地
|
564 |
+
local_success = save_locally_with_retry(final_data_package, local_filename)
|
565 |
+
|
566 |
+
if local_success:
|
567 |
+
result_str += f"\n\n⚠️ 上传失败,结果已保存到本地文件: {local_filename}"
|
568 |
+
else:
|
569 |
+
result_str += "\n\n❌ 上传失败且无法保存到本地文件,请联系管理员"
|
570 |
+
|
571 |
+
# 更新count.json(剔除未完成的题目)
|
572 |
+
count_update_success = update_count_with_retry(
|
573 |
+
user_data.get("updated_count_data", {}),
|
574 |
+
user_data["question_set"]
|
575 |
+
)
|
576 |
+
|
577 |
+
if not count_update_success:
|
578 |
+
result_str += "\n\n⚠️ 无法更新题目计数,请联系管理员"
|
579 |
+
|
580 |
+
return (
|
581 |
+
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
|
582 |
+
q_idx, d_idx, {},
|
583 |
+
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
584 |
+
gr.update(), gr.update(),
|
585 |
+
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
|
586 |
+
except Exception as e:
|
587 |
+
print(f"提交过程中发生错误: {e}")
|
588 |
+
# 返回错误信息
|
589 |
+
error_msg = f"提交过程中发生错误: {str(e)}"
|
590 |
+
return (
|
591 |
+
gr.update(), gr.update(), gr.update(), gr.update(),
|
592 |
+
q_idx, d_idx, selections,
|
593 |
+
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
594 |
+
gr.update(), gr.update(),
|
595 |
+
) + (gr.update(),) * MAX_SUB_DIMS + (all_results, error_msg)
|
596 |
+
|
597 |
+
"""def save_all_results_to_file(all_results, user_data, count_data=None):
|
598 |
repo_id = "intersteller2887/Turing-test-dataset"
|
599 |
username = user_data.get("username", "user")
|
600 |
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
|
|
644 |
)
|
645 |
|
646 |
except Exception as e:
|
647 |
+
print(f"上传出错: {e}")"""
|
648 |
|
649 |
+
@retry_with_timeout(max_retries=3, timeout=10)
|
650 |
+
def save_all_results_to_file(all_results, user_data, count_data=None):
|
651 |
+
repo_id = "intersteller2887/Turing-test-dataset"
|
652 |
+
username = user_data.get("username", "user")
|
653 |
+
timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
|
654 |
+
submission_filename = f"submissions_{username}_{timestamp}.json"
|
655 |
+
|
656 |
+
user_info_clean = {
|
657 |
+
k: v for k, v in user_data.items() if k not in ["question_set", "updated_count_data"]
|
658 |
+
}
|
659 |
+
|
660 |
+
final_data_package = {
|
661 |
+
"user_info": user_info_clean,
|
662 |
+
"results": all_results
|
663 |
+
}
|
664 |
+
json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4)
|
665 |
+
hf_token = os.getenv("HF_TOKEN")
|
666 |
+
|
667 |
+
if not hf_token:
|
668 |
+
raise Exception("HF_TOKEN not found. Cannot upload to the Hub.")
|
669 |
+
|
670 |
+
api = HfApi()
|
671 |
+
|
672 |
+
# 上传提交文件
|
673 |
+
api.upload_file(
|
674 |
+
path_or_fileobj=bytes(json_string, "utf-8"),
|
675 |
+
path_in_repo=f"submissions/{submission_filename}",
|
676 |
+
repo_id=repo_id,
|
677 |
+
repo_type="dataset",
|
678 |
+
token=hf_token,
|
679 |
+
commit_message=f"Add new submission from {username}",
|
680 |
+
timeout=30
|
681 |
+
)
|
682 |
+
|
683 |
+
if count_data:
|
684 |
+
with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
|
685 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
686 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
687 |
+
|
688 |
+
api.upload_file(
|
689 |
+
path_or_fileobj=COUNT_JSON_PATH,
|
690 |
+
path_in_repo=COUNT_JSON_REPO_PATH,
|
691 |
+
repo_id=repo_id,
|
692 |
+
repo_type="dataset",
|
693 |
+
token=hf_token,
|
694 |
+
commit_message=f"Update count.json after submission by {username}",
|
695 |
+
timeout=30
|
696 |
+
)
|
697 |
|
698 |
def toggle_reference_view(current):
|
699 |
if current == "参考":
|