OmniAICreator commited on
Commit
d659216
Β·
verified Β·
1 Parent(s): 9022a66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py CHANGED
@@ -5,6 +5,7 @@ import soundfile as sf
5
  from xcodec2.modeling_xcodec2 import XCodec2Model
6
  import torchaudio
7
  import gradio as gr
 
8
 
9
  llasa_model_id = 'OmniAICreator/Llasa-1B-run1'
10
 
@@ -28,6 +29,68 @@ whisper_turbo_pipe = pipeline(
28
  device='cuda',
29
  )
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def ids_to_speech_tokens(speech_ids):
32
 
33
  speech_tokens_str = []
@@ -56,6 +119,7 @@ def infer(sample_audio_path, target_text, temperature, top_p, progress=gr.Progr
56
  if len(target_text) > 300:
57
  gr.Warning("Text is too long. Please keep it under 300 characters.")
58
  target_text = target_text[:300]
 
59
  with torch.no_grad():
60
  if sample_audio_path:
61
  progress(0, 'Loading and trimming audio...')
 
5
  from xcodec2.modeling_xcodec2 import XCodec2Model
6
  import torchaudio
7
  import gradio as gr
8
+ import re
9
 
10
  llasa_model_id = 'OmniAICreator/Llasa-1B-run1'
11
 
 
29
  device='cuda',
30
  )
31
 
32
+ REPLACE_MAP: dict[str, str] = {
33
+ r"\t": "",
34
+ r"\[n\]": "",
35
+ r" ": "",
36
+ r"γ€€": "",
37
+ r"[;β–Όβ™€β™‚γ€Šγ€‹β‰ͺβ‰«ξ˜Ύβ‘ β‘‘β‘’β‘£β‘€β‘₯]": "",
38
+ r"[\u02d7\u2010-\u2015\u2043\u2212\u23af\u23e4\u2500\u2501\u2e3a\u2e3b]": "",
39
+ r"[\uff5e\u301C]": "γƒΌ",
40
+ r"?": "?",
41
+ r"!": "!",
42
+ r"[●◯〇]": "β—‹",
43
+ r"β™₯": "β™‘",
44
+ }
45
+ FULLWIDTH_ALPHA_TO_HALFWIDTH = str.maketrans(
46
+ {
47
+ chr(full): chr(half)
48
+ for full, half in zip(
49
+ list(range(0xFF21, 0xFF3B)) + list(range(0xFF41, 0xFF5B)),
50
+ list(range(0x41, 0x5B)) + list(range(0x61, 0x7B)),
51
+ )
52
+ }
53
+ )
54
+ HALFWIDTH_KATAKANA_TO_FULLWIDTH = str.maketrans(
55
+ {
56
+ chr(half): chr(full)
57
+ for half, full in zip(range(0xFF61, 0xFF9F), range(0x30A1, 0x30FB))
58
+ }
59
+ )
60
+ FULLWIDTH_DIGITS_TO_HALFWIDTH = str.maketrans(
61
+ {
62
+ chr(full): chr(half)
63
+ for full, half in zip(range(0xFF10, 0xFF1A), range(0x30, 0x3A))
64
+ }
65
+ )
66
+ INVALID_PATTERN = re.compile(
67
+ r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
68
+ r"\u0041-\u005A\u0061-\u007A"
69
+ r"\u0030-\u0039"
70
+ r"。、!?…β™ͺβ™‘β—‹]"
71
+ )
72
+
73
+ def normalize(text: str) -> str:
74
+ for pattern, replacement in REPLACE_MAP.items():
75
+ text = re.sub(pattern, replacement, text)
76
+
77
+ text = text.translate(FULLWIDTH_ALPHA_TO_HALFWIDTH)
78
+ text = text.translate(FULLWIDTH_DIGITS_TO_HALFWIDTH)
79
+ text = text.translate(HALFWIDTH_KATAKANA_TO_FULLWIDTH)
80
+
81
+ text = re.sub(r"…{2,}", "…", text)
82
+ text = re.sub(r"γƒΌ{2,}", "γƒΌ", text)
83
+
84
+ def replace_special_chars(match):
85
+ seq = match.group(0)
86
+ return seq[0] if len(set(seq)) == 1 else seq[0] + seq[-1]
87
+
88
+ text = re.sub(r"[!?β™ͺβ™‘]{2,}", replace_special_chars, text)
89
+
90
+ repeated = "γƒƒγ£γ‚γ„γ†γˆγŠγ‚“γγƒγ…γ‡γ‰γ‚ƒγ‚…γ‚‡γ‚’γ‚€γ‚¦γ‚¨γ‚ͺンゑィγ‚₯ェォャγƒ₯ョ"
91
+ text = re.sub(f"([{repeated}])\\1{{2,}}", r"\1\1", text)
92
+ return text
93
+
94
  def ids_to_speech_tokens(speech_ids):
95
 
96
  speech_tokens_str = []
 
119
  if len(target_text) > 300:
120
  gr.Warning("Text is too long. Please keep it under 300 characters.")
121
  target_text = target_text[:300]
122
+ target_text = normalize(target_text)
123
  with torch.no_grad():
124
  if sample_audio_path:
125
  progress(0, 'Loading and trimming audio...')