HoneyTian commited on
Commit
6032297
·
1 Parent(s): 3e43310
examples/sample_filter/bad_case_find.py CHANGED
@@ -14,12 +14,13 @@ def get_args():
14
  "--data_dir",
15
  # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
16
  # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-3",
17
- default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
 
18
  type=str
19
  )
20
  parser.add_argument(
21
  "--keep_dir",
22
- default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\keep",
23
  type=str
24
  )
25
  parser.add_argument(
@@ -42,7 +43,7 @@ def main():
42
 
43
  client = Client("http://127.0.0.1:7864/")
44
 
45
- for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
46
  # if idx < 400:
47
  # continue
48
  filename = filename.as_posix()
@@ -65,9 +66,9 @@ def main():
65
  )
66
  prob2 = float(prob2)
67
 
68
- if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 1.0:
69
  pass
70
- elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob1 > 0.6:
71
  pass
72
  else:
73
  print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
 
14
  "--data_dir",
15
  # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
16
  # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-3",
17
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
18
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\id",
19
  type=str
20
  )
21
  parser.add_argument(
22
  "--keep_dir",
23
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\keep2",
24
  type=str
25
  )
26
  parser.add_argument(
 
43
 
44
  client = Client("http://127.0.0.1:7864/")
45
 
46
+ for idx, filename in tqdm(enumerate(data_dir.glob("**/*.wav"))):
47
  # if idx < 400:
48
  # continue
49
  filename = filename.as_posix()
 
66
  )
67
  prob2 = float(prob2)
68
 
69
+ if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 0.6:
70
  pass
71
+ elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob1 > 0.0:
72
  pass
73
  else:
74
  print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
examples/sample_filter/wav_find_by_task_excel.py CHANGED
@@ -13,40 +13,28 @@ from project_settings import project_path
13
 
14
 
15
  """
16
- default="task_DC_Death_Cases_20250220.xlsx",
17
- default="task_DC_Death_Cases_20250221.xlsx",
18
- default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
19
- default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
20
- default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
21
- default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
22
- default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
23
- default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
24
- default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
25
- default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
26
- default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
27
- default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
28
- default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
29
- default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
30
  """
31
 
 
32
  def get_args():
33
  parser = argparse.ArgumentParser()
34
  parser.add_argument(
35
  "--task_file",
36
- # default="task_DC_Death_Cases_20250220.xlsx",
37
- # default="task_DC_Death_Cases_20250221.xlsx",
38
- # default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
39
- # default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
40
- # default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
41
- # default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
42
- # default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
43
- # default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
44
- # default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
45
- # default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
46
- # default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
47
- # default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
48
- # default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
49
- default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
50
  type=str
51
  )
52
  parser.add_argument(
 
13
 
14
 
15
  """
16
+
17
+ task_DcTask_5_ID_LIVE_20250224_20250224-1.xlsx
18
+ task_DcTask_9_ID_LIVE_20250223_20250223-1.xlsx
19
+ task_DcTask_9_ID_LIVE_20250224_20250224-1.xlsx
20
+ task_DcTask_PTP_ID_LIVE_20250222_20250222-1.xlsx
21
+ task_DcTask_PTP_ID_LIVE_20250224_20250224-1.xlsx
 
 
 
 
 
 
 
 
22
  """
23
 
24
+
25
  def get_args():
26
  parser = argparse.ArgumentParser()
27
  parser.add_argument(
28
  "--task_file",
29
+ # default="task_DcTask_1_ID_LIVE_20250223_20250223-1.xlsx",
30
+ # default="task_DcTask_1_ID_LIVE_20250224_20250224-1.xlsx",
31
+ # default="task_DcTask_3_ID_LIVE_20250224_20250224-1.xlsx",
32
+ # default="task_DcTask_5_ID_LIVE_20250223_20250223-1.xlsx",
33
+ # default="task_DcTask_5_ID_LIVE_20250224_20250224-1.xlsx",
34
+ # default="task_DcTask_9_ID_LIVE_20250223_20250223-1.xlsx",
35
+ # default="task_DcTask_9_ID_LIVE_20250224_20250224-1.xlsx",
36
+ # default="task_DcTask_PTP_ID_LIVE_20250222_20250222-1.xlsx",
37
+ default="task_DcTask_PTP_ID_LIVE_20250224_20250224-1.xlsx",
 
 
 
 
 
38
  type=str
39
  )
40
  parser.add_argument(
examples/vm_sound_classification/run_batch.sh CHANGED
@@ -21,10 +21,10 @@
21
  #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
22
 
23
 
24
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4 \
25
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
26
- --label_plan 8 \
27
- --config_file "yaml/conv2d-classifier-8-ch4.yaml"
28
 
29
 
30
  # sound ch8
@@ -99,23 +99,23 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
 
101
 
102
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32 \
103
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
- --label_plan 8 \
105
- --config_file "yaml/conv2d-classifier-8-ch32.yaml"
106
 
107
 
108
  # pretrained voicemail
109
 
110
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4 \
111
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
112
- --label_plan 2-voicemail \
113
- --config_file "yaml/conv2d-classifier-2-ch4.yaml"
114
-
115
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32 \
116
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
117
- --label_plan 2-voicemail \
118
- --config_file "yaml/conv2d-classifier-2-ch32.yaml"
119
 
120
 
121
  # voicemail ch4
@@ -126,11 +126,11 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
126
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
127
  #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
128
 
129
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4 \
130
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
131
- --label_plan 2-voicemail \
132
- --config_file "yaml/conv2d-classifier-2-ch4.yaml" \
133
- --pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
134
 
135
  #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4 \
136
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
@@ -195,11 +195,11 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
195
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
196
  #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
197
 
198
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
199
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
200
- --label_plan 2-voicemail \
201
- --config_file "yaml/conv2d-classifier-2-ch32.yaml" \
202
- --pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
203
 
204
  #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
205
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
 
21
  #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
22
 
23
 
24
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4 \
25
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
26
+ #--label_plan 8 \
27
+ #--config_file "yaml/conv2d-classifier-8-ch4.yaml"
28
 
29
 
30
  # sound ch8
 
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
 
101
 
102
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32 \
103
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
+ #--label_plan 8 \
105
+ #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
106
 
107
 
108
  # pretrained voicemail
109
 
110
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4 \
111
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
112
+ #--label_plan 2-voicemail \
113
+ #--config_file "yaml/conv2d-classifier-2-ch4.yaml"
114
+ #
115
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32 \
116
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
117
+ #--label_plan 2-voicemail \
118
+ #--config_file "yaml/conv2d-classifier-2-ch32.yaml"
119
 
120
 
121
  # voicemail ch4
 
126
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
127
  #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
128
 
129
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4 \
130
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
131
+ #--label_plan 2-voicemail \
132
+ #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
133
+ #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
134
 
135
  #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4 \
136
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
 
195
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
196
  #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
197
 
198
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
199
+ #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
200
+ #--label_plan 2-voicemail \
201
+ #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
202
+ #--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
203
 
204
  #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
205
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \