Turing-test-web-en

Sleeping

App Files Files

xet

Community

Spark Chou commited on Jul 21

Commit

76578a8

1 Parent(s): 5d0ff5c

m

Browse files

Files changed (1) hide show

app.py +42 -42

app.py CHANGED Viewed

@@ -47,62 +47,62 @@ print(sample1_audio_path)
 DIMENSIONS_DATA = [
     {
-        "title": "语义和语用特征",
-        "audio": sample1_audio_path,
         "sub_dims": [
-            "记忆一致性：偏人类：在短上下文中记忆一致，若出现记忆偏差也会提问确认；偏机器：出现上下文记忆不一致且无法察觉或修正（如遗忘掉关键信息坚持错误回答）",
-            "逻辑连贯性：偏人类：逻辑自然流畅；偏机器：逻辑转折生硬或自相矛盾（如：突然切换话题无过渡）",
-            "读音正确性：偏人类：用字发音正确、自然，会结合语境正确使用常见多音字；偏机器：存在不自然的发音错误，常见多音字发音错误",
-            "多语言混杂：偏人类：说话多语言混杂往往和语境相关（专有名词、习惯用法），语言切换生硬卡顿，不自然；偏机器：多语言混杂生硬，无语言切换逻辑",
-            "语言不精确性：偏人类：说话存在含糊表达：如“差不多”、“应该是吧”，且会出现自我修正（“不对不对”）的行为；偏机器：回应通常不存在模糊表达，回答准确、肯定",
-            "填充词使用：偏人类：在思考时经常使用填充词（如‘嗯’‘那个’）；偏机器：很少使用填充词或填充词使用不自然",
-            "隐喻与语用用意：偏人类：使用隐喻、反语、委婉来表达多重含义；偏机器：表达直白，缺乏语义多样性，仅能字面理解语义"
         ],
-        "reference_scores": [5, 5, 5, 0, 5, 5, 0]
     },
     {
-        "title": "非生理性副语言特征",
-        "audio": sample1_audio_path,
         "sub_dims": [
-            "节奏：偏人类：语速随语义起伏，偶尔卡顿或犹豫；偏机器：说话几乎无停顿或停顿机械",
-            "语调：偏人类：在表达如疑问、惊讶、强调时，音调会自然上扬或下降；偏机器：语调单一或变化过于规律，不符合语境",
-            "重读：偏人类：有意识地重读重要词语，突出重点；偏机器：没有重读词语或或出现强调部位异常",
-            "辅助性发声：偏人类：发出符合语境的非语言声音，如笑声、叹气等；偏机器：辅助性发声语境错误或机械化，或完全无辅助性发声"
         ],
-        "reference_scores": [5, 5, 5, 5]
     },
     {
-        "title": "生理性副语言特征",
-        "audio": sample1_audio_path,
         "sub_dims": [
-            "微生理杂音：偏人类：说话存在呼吸声、口水音、气泡音等无意识发声，且自然地出现在说话中；偏机器：语音过于干净，或发出不自然杂音（电流声）",
-            "发音不稳定性：偏人类：发音存在一定不规则性（诸如连读、颤音、含糊发音、鼻音等）；偏机器：发音过于清晰规则",
-            "口音：偏人类：存在自然的地区口音或语音特征；偏机器：口音生硬"
         ],
-        "reference_scores": [5, 4, 4]
     },
     {
-        "title": "机械人格",
-        "audio": sample1_audio_path,
         "sub_dims": [
-            "谄媚现象：偏人类：根据语境判断是否同意对方提出的请求或表达的观点，不总是表示同意或进行附和；偏机器：频繁同意、感谢、道歉，过度认同对方观点，缺乏真实互动感",
-            "书面化表达：偏人类：口语化，表达灵活多变；偏机器：回应句式工整、规范，用词过于正式、频繁列举、用词泛泛"
         ],
         "reference_scores": [5, 5]
     },
     {
-        "title": "情感表达",
-        "audio": sample1_audio_path,
         "sub_dims": [
-            "语义层面：偏人类：对悲伤、开心等语境有符合人类的情绪反应；偏机器：未能针对对方情绪作出正常的情感反应，或表达情感的词语空泛、脱离语境",
-            "声学层面：偏人类：音调、音量、节奏等声学特征随情绪动态变化；偏机器：情感语调模式化或与语境不符"
         ],
-        "reference_scores": [5, 5]
     }
 ]
 DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
-SPECIAL_KEYWORDS = ["多语言混杂", "隐喻与语用用意", "辅助性发声", "口音"]
 MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
 THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
@@ -371,13 +371,13 @@ def update_test_dimension_view(d_idx, selections):
     sub_dims = dim_data["sub_dims"]
     dim_title = dim_data["title"]
     existing_scores = selections.get(dim_data['title'], {})
-    progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
     for i in range(MAX_SUB_DIMS):
         if i < len(sub_dims):
             desc = sub_dims[i]
             # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
-            name = desc.split("：")[0].strip()
             default_value = 0 if name in SPECIAL_KEYWORDS else 1
             value = existing_scores.get(desc, default_value)
@@ -400,7 +400,7 @@ def update_test_dimension_view(d_idx, selections):
             # ))
         else:
             slider_updates.append(gr.update(visible=False))
-        print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
     # for i in range(MAX_SUB_DIMS):
     #     if i < len(dimension['sub_dims']):
     #         sub_dim_label = dimension['sub_dims'][i]
@@ -411,7 +411,7 @@ def update_test_dimension_view(d_idx, selections):
     prev_btn_update = gr.update(interactive=(d_idx > 0))
     next_btn_update = gr.update(
-        value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
         interactive=True
     )
@@ -420,7 +420,7 @@ def update_test_dimension_view(d_idx, selections):
 def init_test_question(user_data, q_idx):
     d_idx = 0
     question = user_data["question_set"][q_idx]
-    progress_q = f"第 {q_idx + 1} / {len(user_data['question_set'])} 题"
     initial_updates = update_test_dimension_view(d_idx, {})
     dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
@@ -664,9 +664,9 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
             return init_q_updates + (all_results, gr.update(value=""))
         else:
             # 准备完整结果数据
-            result_str = "### 测试全部完成！\n\n你的提交结果概览：\n"
             for res in all_results:
-                result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
                 for dim_title, dim_data in res['selections'].items():
                     if dim_title == 'final_choice': continue
                     result_str += f"- **{dim_title}**:\n"
@@ -858,8 +858,8 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
     }
     with welcome_page:
-        gr.Markdown("# AI 识破者\n你将听到一系列对话，请判断哪个回应者是 AI。")
-        start_btn = gr.Button("开始挑战", variant="primary")
     with info_page:
         gr.Markdown("## 请提供一些基本信息")

 DIMENSIONS_DATA = [
     {
+        "title": "Semantic and Pragmatic Features",
+        "audio": "sample1_audio_path",
         "sub_dims": [
+            "Memory Consistency: Human memory in short contexts is usually consistent and self-correcting (e.g., by asking questions); machines may show inconsistent context memory and fail to notice or correct errors (e.g., forgetting key information and persisting in wrong answers).",
+            "Logical Coherence: Human logic is naturally coherent and allows reasonable leaps; machine logic is abrupt or self-contradictory (e.g., sudden topic shifts without transitions).",
+            "Pronunciation Accuracy: Human-like: Correct and natural pronunciation of words, including context-appropriate usage of common English heteronyms; Machine-like: Unnatural pronunciation errors, especially mispronunciation of common heteronyms",
+            "Code-switching: Humans mix multiple languages fluently and contextually; machines mix languages rigidly, lacking logical language switching.",
+            "Linguistic Vagueness: Human speech tends to include vague expressions (e.g., “more or less,” “I guess”) and self-corrections; machine responses are typically precise and assertive.",
+            "Filler Word Usage: Human filler words (e.g., 'uh', 'like') appear randomly and show signs of thinking; machine fillers are either repetitive and patterned or completely absent.",
+            "Metaphor and Pragmatic Intent: Humans use metaphors, irony, and euphemisms to express layered meanings; machines interpret literally or use rhetorical devices awkwardly, lacking semantic richness."
         ],
+        "reference_scores": [5, 5, 3, 3, 5, 5, 3]
     },
     {
+        "title": "Non-Physiological Paralinguistic Features",
+        "audio": "sample1_audio_path",
         "sub_dims": [
+            "Rhythm: Human speech rate varies with meaning, occasionally hesitating or pausing; machine rhythm is uniform, with little or mechanical pauses.",
+            "Intonation: Humans naturally raise or lower pitch to express questions, surprise, or emphasis; machine intonation is monotonous or overly patterned, mismatching the context.",
+            "Emphasis: Humans consciously stress key words to highlight important information; machines have uniform word emphasis or stress incorrect parts.",
+            "Auxiliary Vocalizations: Humans produce context-appropriate non-verbal sounds (e.g., laughter, sighs); machine non-verbal sounds are contextually incorrect, mechanical, or absent."
         ],
+        "reference_scores": [4, 5, 4, 3]
     },
     {
+        "title": "Physiological Paralinguistic Features",
+        "audio": "sample1_audio_path",
         "sub_dims": [
+            "Micro-physiological Noise: Human speech includes unconscious physiological sounds like breathing, saliva, or bubbling, naturally woven into rhythm; machine speech is overly clean or adds unnatural noises.",
+            "Pronunciation Instability: Human pronunciation includes irregularities (e.g., linking, tremors, slurring, nasal sounds); machine pronunciation is overly standard and uniform, lacking personality.",
+            "Accent: Humans naturally exhibit regional accents or speech traits; machine accents sound forced or unnatural."
         ],
+        "reference_scores": [3, 3, 4]
     },
     {
+        "title": "Mechanical Persona",
+        "audio": "sample1_audio_path",
         "sub_dims": [
+            "Flattery: Humans assess context to agree or disagree, sometimes offering differing opinions; machines excessively agree, thank, or apologize, over-validating the other party and lacking authentic interaction.",
+            "Formalized Expression: Human speech is flexible; machine responses are formally structured, overly written, and use vague wording."
         ],
         "reference_scores": [5, 5]
     },
     {
+        "title": "Emotional Expression",
+        "audio": "sample1_audio_path",
         "sub_dims": [
+            "Semantic Level: Humans show appropriate emotional responses to contexts like sadness or joy; machines are emotionally flat, or use emotional words vaguely and out of context.",
+            "Acoustic Level: Human pitch, volume, and rhythm change dynamically with emotion; machine emotional tone is formulaic or mismatched with the context."
         ],
+        "reference_scores": [3, 3]
     }
 ]
 DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
+SPECIAL_KEYWORDS = ["Code-switching", "Metaphor and Pragmatic Intent", "Auxiliary Vocalizations", "Accent"]
 MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
 THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
     sub_dims = dim_data["sub_dims"]
     dim_title = dim_data["title"]
     existing_scores = selections.get(dim_data['title'], {})
+    progress_d = f"Dimension {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
     for i in range(MAX_SUB_DIMS):
         if i < len(sub_dims):
             desc = sub_dims[i]
             # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
+            name = desc.split(":")[0].strip()
             default_value = 0 if name in SPECIAL_KEYWORDS else 1
             value = existing_scores.get(desc, default_value)
             # ))
         else:
             slider_updates.append(gr.update(visible=False))
+        # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
     # for i in range(MAX_SUB_DIMS):
     #     if i < len(dimension['sub_dims']):
     #         sub_dim_label = dimension['sub_dims'][i]
     prev_btn_update = gr.update(interactive=(d_idx > 0))
     next_btn_update = gr.update(
+        value="Proceed to Final Judgement" if d_idx == len(DIMENSIONS_DATA) - 1 else "Next Dimension",
         interactive=True
     )
 def init_test_question(user_data, q_idx):
     d_idx = 0
     question = user_data["question_set"][q_idx]
+    progress_q = f"Question {q_idx + 1} / {len(user_data['question_set'])}"
     initial_updates = update_test_dimension_view(d_idx, {})
     dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
             return init_q_updates + (all_results, gr.update(value=""))
         else:
             # 准备完整结果数据
+            result_str = "### Test Completed！\n\nOverview of your submission：\n"
             for res in all_results:
+                result_str += f"##### Final Judgement: **{res['selections'].get('final_choice', 'empty')}**\n" # empty == no choice
                 for dim_title, dim_data in res['selections'].items():
                     if dim_title == 'final_choice': continue
                     result_str += f"- **{dim_title}**:\n"
     }
     with welcome_page:
+        gr.Markdown("# AI Indentifier: Can you spot the AI?\n你将听到一系列对话，请判断哪个回应者是 AI。")
+        start_btn = gr.Button("Start", variant="primary")
     with info_page:
         gr.Markdown("## 请提供一些基本信息")