Galgame-Llasa-8B

Running on Zero

App Files Files Community

OmniAICreator commited on 29 days ago

Commit

d659216

verified ·

1 Parent(s): 9022a66

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import soundfile as sf
 from xcodec2.modeling_xcodec2 import XCodec2Model
 import torchaudio
 import gradio as gr
 llasa_model_id = 'OmniAICreator/Llasa-1B-run1'
@@ -28,6 +29,68 @@ whisper_turbo_pipe = pipeline(
     device='cuda',
 )
 def ids_to_speech_tokens(speech_ids):
     speech_tokens_str = []
@@ -56,6 +119,7 @@ def infer(sample_audio_path, target_text, temperature, top_p,  progress=gr.Progr
     if len(target_text) > 300:
         gr.Warning("Text is too long. Please keep it under 300 characters.")
         target_text = target_text[:300]
     with torch.no_grad():
         if sample_audio_path:
             progress(0, 'Loading and trimming audio...')

 from xcodec2.modeling_xcodec2 import XCodec2Model
 import torchaudio
 import gradio as gr
+import re
 llasa_model_id = 'OmniAICreator/Llasa-1B-run1'
     device='cuda',
 )
+REPLACE_MAP: dict[str, str] = {
+    r"\t": "",
+    r"\[n\]": "",
+    r" ": "",
+    r"　": "",
+    r"[;▼♀♂《》≪≫①②③④⑤⑥]": "",
+    r"[\u02d7\u2010-\u2015\u2043\u2212\u23af\u23e4\u2500\u2501\u2e3a\u2e3b]": "",
+    r"[\uff5e\u301C]": "ー",
+    r"？": "?",
+    r"！": "!",
+    r"[●◯〇]": "○",
+    r"♥": "♡",
+}
+FULLWIDTH_ALPHA_TO_HALFWIDTH = str.maketrans(
+    {
+        chr(full): chr(half)
+        for full, half in zip(
+            list(range(0xFF21, 0xFF3B)) + list(range(0xFF41, 0xFF5B)),
+            list(range(0x41, 0x5B)) + list(range(0x61, 0x7B)),
+        )
+    }
+)
+HALFWIDTH_KATAKANA_TO_FULLWIDTH = str.maketrans(
+    {
+        chr(half): chr(full)
+        for half, full in zip(range(0xFF61, 0xFF9F), range(0x30A1, 0x30FB))
+    }
+)
+FULLWIDTH_DIGITS_TO_HALFWIDTH = str.maketrans(
+    {
+        chr(full): chr(half)
+        for full, half in zip(range(0xFF10, 0xFF1A), range(0x30, 0x3A))
+    }
+)
+INVALID_PATTERN = re.compile(
+    r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
+    r"\u0041-\u005A\u0061-\u007A"
+    r"\u0030-\u0039"
+    r"。、!?…♪♡○]"
+)
+def normalize(text: str) -> str:
+    for pattern, replacement in REPLACE_MAP.items():
+        text = re.sub(pattern, replacement, text)
+    text = text.translate(FULLWIDTH_ALPHA_TO_HALFWIDTH)
+    text = text.translate(FULLWIDTH_DIGITS_TO_HALFWIDTH)
+    text = text.translate(HALFWIDTH_KATAKANA_TO_FULLWIDTH)
+    text = re.sub(r"…{2,}", "…", text)
+    text = re.sub(r"ー{2,}", "ー", text)
+    def replace_special_chars(match):
+        seq = match.group(0)
+        return seq[0] if len(set(seq)) == 1 else seq[0] + seq[-1]
+    text = re.sub(r"[!?♪♡]{2,}", replace_special_chars, text)
+    repeated = "ッっあいうえおんぁぃぅぇぉゃゅょアイウエオンァィゥェォャュョ"
+    text = re.sub(f"([{repeated}])\\1{{2,}}", r"\1\1", text)
+    return text
 def ids_to_speech_tokens(speech_ids):
     speech_tokens_str = []
     if len(target_text) > 300:
         gr.Warning("Text is too long. Please keep it under 300 characters.")
         target_text = target_text[:300]
+    target_text = normalize(target_text)
     with torch.no_grad():
         if sample_audio_path:
             progress(0, 'Loading and trimming audio...')