Spaces:
Runtime error
Runtime error
Update gradio_tabs/single.py
Browse files- gradio_tabs/single.py +32 -31
gradio_tabs/single.py
CHANGED
|
@@ -15,7 +15,7 @@ import pyopenjtalk
|
|
| 15 |
import io # メモリ上でのファイル操作用
|
| 16 |
from pydub import AudioSegment # 結合機能のために追加
|
| 17 |
import hashlib # メタデータハッシュ化用
|
| 18 |
-
import math # ダミー計算用,
|
| 19 |
import tempfile # 一時ファイル作成用
|
| 20 |
import functools
|
| 21 |
import uuid # 結合ファイルの一意な名前生成のために追加
|
|
@@ -576,10 +576,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 576 |
dummy_audio_item_columns.append(dummy_col)
|
| 577 |
|
| 578 |
with gr.Accordion("ステータス", open=True):
|
| 579 |
-
# ▼▼▼ 修正 ▼▼▼
|
| 580 |
-
# Textboxを複数行表示可能に変更
|
| 581 |
status_textbox = gr.Textbox(interactive=False, lines=1, max_lines=4, autoscroll=True, show_label=False, placeholder="ここにログが表示されます...")
|
| 582 |
-
# ▲▲▲ 修正 ▲▲▲
|
| 583 |
|
| 584 |
with gr.Column(scale=1):
|
| 585 |
with gr.Row():
|
|
@@ -625,14 +622,15 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 625 |
with gr.Column(scale=1):
|
| 626 |
with gr.Blocks():
|
| 627 |
# ▼▼▼ 変更 ▼▼▼
|
| 628 |
-
# 結合UI
|
|
|
|
| 629 |
with gr.Row():
|
| 630 |
-
with gr.Column(scale=1):
|
| 631 |
first_audio_num_input = gr.Number(label="前半", value=1, minimum=1, step=1, precision=0, interactive=True)
|
| 632 |
-
volume_first_slider = gr.Slider(label="
|
| 633 |
-
with gr.Column(scale=1):
|
| 634 |
second_audio_num_input = gr.Number(label="後半", value=2, minimum=1, step=1, precision=0, interactive=True)
|
| 635 |
-
volume_second_slider = gr.Slider(label="
|
| 636 |
merge_pause_input = gr.Number(label="間のポーズ(ms)", value=DEFAULT_WORKBENCH_PAUSE, minimum=-10000, step=10, info="マイナスで重ね合わせ(オーバーレイ)", interactive=True)
|
| 637 |
# ▲▲▲ 変更 ▲▲▲
|
| 638 |
with gr.Row():
|
|
@@ -1065,15 +1063,22 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1065 |
return (final_status, updated_list) + ui_updates
|
| 1066 |
|
| 1067 |
# ▼▼▼ 変更 ▼▼▼
|
| 1068 |
-
#
|
| 1069 |
def action_merge_preview(
|
| 1070 |
current_status: str,
|
| 1071 |
-
first_audio_num: int,
|
| 1072 |
-
second_audio_num: int,
|
| 1073 |
pause_ms: int, workbench_list: List[Dict],
|
| 1074 |
progress=gr.Progress(track_tqdm=True)
|
| 1075 |
):
|
| 1076 |
log_messages = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1077 |
def create_error_return():
|
| 1078 |
if ENABLE_LOGGING:
|
| 1079 |
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
|
@@ -1097,15 +1102,18 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1097 |
|
| 1098 |
progress(0, desc="結合準備中...")
|
| 1099 |
try:
|
| 1100 |
-
# pydub
|
| 1101 |
-
segment1 = AudioSegment.from_file(audio_path1)
|
| 1102 |
-
|
|
|
|
|
|
|
|
|
|
| 1103 |
|
| 1104 |
pause_duration = int(pause_ms)
|
| 1105 |
if pause_duration >= 0:
|
| 1106 |
combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
|
| 1107 |
-
#
|
| 1108 |
-
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num}({
|
| 1109 |
else:
|
| 1110 |
overlap_duration = abs(pause_duration)
|
| 1111 |
max_possible_overlap = min(len(segment1), len(segment2))
|
|
@@ -1114,31 +1122,24 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1114 |
overlap_duration = max_possible_overlap
|
| 1115 |
combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
|
| 1116 |
combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
|
| 1117 |
-
#
|
| 1118 |
-
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num}({
|
| 1119 |
progress(1, desc="結合完了")
|
| 1120 |
except Exception as e:
|
| 1121 |
log_messages.append(f"❌ [結合プレビューエラー] 音声の結合または音量調整中にエラーが発生しました: {e}")
|
| 1122 |
return create_error_return()
|
| 1123 |
|
| 1124 |
# --- 新しいファイル名生成ロジック ---
|
| 1125 |
-
# 1. モデル名の収集と結合
|
| 1126 |
original_models1 = item1.get('original_models', [])
|
| 1127 |
original_models2 = item2.get('original_models', [])
|
| 1128 |
all_original_models_set = set(original_models1 + original_models2)
|
| 1129 |
sorted_original_models = sorted(list(all_original_models_set))
|
| 1130 |
model_part = "_".join([sanitize_filename(name) for name in sorted_original_models])
|
| 1131 |
-
|
| 1132 |
-
# 2. テキストの収集と結合
|
| 1133 |
-
text1 = item1.get('text', '')
|
| 1134 |
-
text2 = item2.get('text', '')
|
| 1135 |
combined_text = f"{text1}_{text2}"
|
| 1136 |
-
text_part = sanitize_filename(combined_text[:50])
|
| 1137 |
-
|
| 1138 |
-
# 3. ベースファイル名の作成とフォールバック
|
| 1139 |
base_filename = f"{model_part}-{text_part}" if model_part and text_part else f"merged_{uuid.uuid4().hex[:8]}"
|
| 1140 |
|
| 1141 |
-
# 4. 一時ファイルのパスを決定(重複回避)
|
| 1142 |
temp_dir = Path(tempfile.gettempdir())
|
| 1143 |
wav_temp_path = temp_dir / f"{base_filename}.wav"
|
| 1144 |
count = 1
|
|
@@ -1150,7 +1151,8 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1150 |
combined_audio.export(wav_temp_path, format="wav")
|
| 1151 |
combined_audio.export(mp3_temp_path, format="mp3", bitrate="192k")
|
| 1152 |
|
| 1153 |
-
display_models1
|
|
|
|
| 1154 |
all_display_models = {m.strip() for m in display_models1 + display_models2 if m.strip()}
|
| 1155 |
|
| 1156 |
metadata = {
|
|
@@ -1171,7 +1173,6 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1171 |
return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
|
| 1172 |
# ▲▲▲ 変更 ▲▲▲
|
| 1173 |
|
| 1174 |
-
|
| 1175 |
def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
|
| 1176 |
log_messages = []
|
| 1177 |
safe_workbench_list = current_workbench_list or []
|
|
@@ -1308,7 +1309,7 @@ def create_synthesis_app(model_holder: TTSModelHolder) -> gr.Blocks:
|
|
| 1308 |
)
|
| 1309 |
|
| 1310 |
# ▼▼▼ 変更 ▼▼▼
|
| 1311 |
-
# クリックイベントのinputs
|
| 1312 |
merge_preview_button.click(
|
| 1313 |
fn=action_merge_preview,
|
| 1314 |
inputs=[
|
|
|
|
| 15 |
import io # メモリ上でのファイル操作用
|
| 16 |
from pydub import AudioSegment # 結合機能のために追加
|
| 17 |
import hashlib # メタデータハッシュ化用
|
| 18 |
+
import math # ダミー計算用, 容量計算用, 音量計算用
|
| 19 |
import tempfile # 一時ファイル作成用
|
| 20 |
import functools
|
| 21 |
import uuid # 結合ファイルの一意な名前生成のために追加
|
|
|
|
| 576 |
dummy_audio_item_columns.append(dummy_col)
|
| 577 |
|
| 578 |
with gr.Accordion("ステータス", open=True):
|
|
|
|
|
|
|
| 579 |
status_textbox = gr.Textbox(interactive=False, lines=1, max_lines=4, autoscroll=True, show_label=False, placeholder="ここにログが表示されます...")
|
|
|
|
| 580 |
|
| 581 |
with gr.Column(scale=1):
|
| 582 |
with gr.Row():
|
|
|
|
| 622 |
with gr.Column(scale=1):
|
| 623 |
with gr.Blocks():
|
| 624 |
# ▼▼▼ 変更 ▼▼▼
|
| 625 |
+
# 結合UIを更新し、音量調整スライダーを倍率に変更しレイアウトを調整
|
| 626 |
+
gr.Markdown("#### 音声の結合")
|
| 627 |
with gr.Row():
|
| 628 |
+
with gr.Column(scale=1, min_width=160):
|
| 629 |
first_audio_num_input = gr.Number(label="前半", value=1, minimum=1, step=1, precision=0, interactive=True)
|
| 630 |
+
volume_first_slider = gr.Slider(label="音量(倍率)", minimum=0.0, maximum=3.0, value=1.0, step=0.05, interactive=True)
|
| 631 |
+
with gr.Column(scale=1, min_width=160):
|
| 632 |
second_audio_num_input = gr.Number(label="後半", value=2, minimum=1, step=1, precision=0, interactive=True)
|
| 633 |
+
volume_second_slider = gr.Slider(label="音量(倍率)", minimum=0.0, maximum=3.0, value=1.0, step=0.05, interactive=True)
|
| 634 |
merge_pause_input = gr.Number(label="間のポーズ(ms)", value=DEFAULT_WORKBENCH_PAUSE, minimum=-10000, step=10, info="マイナスで重ね合わせ(オーバーレイ)", interactive=True)
|
| 635 |
# ▲▲▲ 変更 ▲▲▲
|
| 636 |
with gr.Row():
|
|
|
|
| 1063 |
return (final_status, updated_list) + ui_updates
|
| 1064 |
|
| 1065 |
# ▼▼▼ 変更 ▼▼▼
|
| 1066 |
+
# 関数のシグネチャとロジックを倍率ベースに変更
|
| 1067 |
def action_merge_preview(
|
| 1068 |
current_status: str,
|
| 1069 |
+
first_audio_num: int, volume1_ratio: float,
|
| 1070 |
+
second_audio_num: int, volume2_ratio: float,
|
| 1071 |
pause_ms: int, workbench_list: List[Dict],
|
| 1072 |
progress=gr.Progress(track_tqdm=True)
|
| 1073 |
):
|
| 1074 |
log_messages = []
|
| 1075 |
+
|
| 1076 |
+
def ratio_to_db(ratio: float) -> float:
|
| 1077 |
+
"""倍率をdBに変換する。0以下の場合は-infを返す。"""
|
| 1078 |
+
if ratio <= 0:
|
| 1079 |
+
return -float('inf') # pydubでは-infで無音になる
|
| 1080 |
+
return 20 * math.log10(ratio)
|
| 1081 |
+
|
| 1082 |
def create_error_return():
|
| 1083 |
if ENABLE_LOGGING:
|
| 1084 |
final_status = (current_status + "\n" + "\n".join(log_messages)).strip()
|
|
|
|
| 1102 |
|
| 1103 |
progress(0, desc="結合準備中...")
|
| 1104 |
try:
|
| 1105 |
+
# pydubでファイルを読み込み、指定された倍率で音量を調整
|
| 1106 |
+
segment1 = AudioSegment.from_file(audio_path1)
|
| 1107 |
+
segment1 = segment1 + ratio_to_db(float(volume1_ratio))
|
| 1108 |
+
|
| 1109 |
+
segment2 = AudioSegment.from_file(audio_path2)
|
| 1110 |
+
segment2 = segment2 + ratio_to_db(float(volume2_ratio))
|
| 1111 |
|
| 1112 |
pause_duration = int(pause_ms)
|
| 1113 |
if pause_duration >= 0:
|
| 1114 |
combined_audio = segment1 + AudioSegment.silent(duration=pause_duration) + segment2
|
| 1115 |
+
# ログに音量情報を倍率で表示
|
| 1116 |
+
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num}({volume1_ratio:.2f}倍) と #{second_audio_num}({volume2_ratio:.2f}倍) を {pause_duration}ms のポーズを挟んで結合しました。")
|
| 1117 |
else:
|
| 1118 |
overlap_duration = abs(pause_duration)
|
| 1119 |
max_possible_overlap = min(len(segment1), len(segment2))
|
|
|
|
| 1122 |
overlap_duration = max_possible_overlap
|
| 1123 |
combined_audio = AudioSegment.silent(duration=len(segment1) + len(segment2) - overlap_duration)
|
| 1124 |
combined_audio = combined_audio.overlay(segment1, position=0).overlay(segment2, position=len(segment1) - overlap_duration)
|
| 1125 |
+
# ログに音量情報を倍率で表示
|
| 1126 |
+
if ENABLE_LOGGING: log_messages.append(f"音声 #{first_audio_num}({volume1_ratio:.2f}倍) と #{second_audio_num}({volume2_ratio:.2f}倍) を {overlap_duration}ms 重ねて結合しました。")
|
| 1127 |
progress(1, desc="結合完了")
|
| 1128 |
except Exception as e:
|
| 1129 |
log_messages.append(f"❌ [結合プレビューエラー] 音声の結合または音量調整中にエラーが発生しました: {e}")
|
| 1130 |
return create_error_return()
|
| 1131 |
|
| 1132 |
# --- 新しいファイル名生成ロジック ---
|
|
|
|
| 1133 |
original_models1 = item1.get('original_models', [])
|
| 1134 |
original_models2 = item2.get('original_models', [])
|
| 1135 |
all_original_models_set = set(original_models1 + original_models2)
|
| 1136 |
sorted_original_models = sorted(list(all_original_models_set))
|
| 1137 |
model_part = "_".join([sanitize_filename(name) for name in sorted_original_models])
|
| 1138 |
+
text1, text2 = item1.get('text', ''), item2.get('text', '')
|
|
|
|
|
|
|
|
|
|
| 1139 |
combined_text = f"{text1}_{text2}"
|
| 1140 |
+
text_part = sanitize_filename(combined_text[:50])
|
|
|
|
|
|
|
| 1141 |
base_filename = f"{model_part}-{text_part}" if model_part and text_part else f"merged_{uuid.uuid4().hex[:8]}"
|
| 1142 |
|
|
|
|
| 1143 |
temp_dir = Path(tempfile.gettempdir())
|
| 1144 |
wav_temp_path = temp_dir / f"{base_filename}.wav"
|
| 1145 |
count = 1
|
|
|
|
| 1151 |
combined_audio.export(wav_temp_path, format="wav")
|
| 1152 |
combined_audio.export(mp3_temp_path, format="mp3", bitrate="192k")
|
| 1153 |
|
| 1154 |
+
display_models1 = item1.get('model', '').split(' | ') if item1.get('model') else []
|
| 1155 |
+
display_models2 = item2.get('model', '').split(' | ') if item2.get('model') else []
|
| 1156 |
all_display_models = {m.strip() for m in display_models1 + display_models2 if m.strip()}
|
| 1157 |
|
| 1158 |
metadata = {
|
|
|
|
| 1173 |
return final_status, str(mp3_temp_path), gr.update(value=str(wav_temp_path), visible=True), metadata
|
| 1174 |
# ▲▲▲ 変更 ▲▲▲
|
| 1175 |
|
|
|
|
| 1176 |
def action_add_merged_to_workbench(current_status: str, preview_data: Dict, current_workbench_list: List[Dict], delete_originals: bool, first_audio_num: int, second_audio_num: int) -> Tuple:
|
| 1177 |
log_messages = []
|
| 1178 |
safe_workbench_list = current_workbench_list or []
|
|
|
|
| 1309 |
)
|
| 1310 |
|
| 1311 |
# ▼▼▼ 変更 ▼▼▼
|
| 1312 |
+
# クリックイベントのinputsを新しいUIコンポーネントとロジックに合わせる
|
| 1313 |
merge_preview_button.click(
|
| 1314 |
fn=action_merge_preview,
|
| 1315 |
inputs=[
|