Spaces:
Running
Running
jason-on-salt-a40
commited on
Commit
·
b1f4e2f
1
Parent(s):
579d79b
fix space error. fix encodec download path
Browse files
app.py
CHANGED
|
@@ -63,7 +63,7 @@ class WhisperModel:
|
|
| 63 |
def transcribe(self, audio_path):
|
| 64 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
| 65 |
|
| 66 |
-
|
| 67 |
class WhisperxModel:
|
| 68 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
| 69 |
from whisperx import load_model
|
|
@@ -100,7 +100,7 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
|
|
| 100 |
|
| 101 |
encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
|
| 102 |
if not os.path.exists(encodec_fn):
|
| 103 |
-
os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th")
|
| 104 |
|
| 105 |
voicecraft_model = {
|
| 106 |
"config": config,
|
|
@@ -114,9 +114,11 @@ def load_models(whisper_backend_name, whisper_model_name, alignment_model_name,
|
|
| 114 |
|
| 115 |
def get_transcribe_state(segments):
|
| 116 |
words_info = [word_info for segment in segments for word_info in segment["words"]]
|
|
|
|
|
|
|
| 117 |
return {
|
| 118 |
"segments": segments,
|
| 119 |
-
"transcript":
|
| 120 |
"words_info": words_info,
|
| 121 |
"transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
|
| 122 |
"transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
|
|
@@ -140,7 +142,7 @@ def transcribe(seed, audio_path):
|
|
| 140 |
state
|
| 141 |
]
|
| 142 |
|
| 143 |
-
|
| 144 |
def align_segments(transcript, audio_path):
|
| 145 |
from aeneas.executetask import ExecuteTask
|
| 146 |
from aeneas.task import Task
|
|
@@ -363,7 +365,7 @@ If disabled, you should write the target transcript yourself:</br>
|
|
| 363 |
- In Edit mode write full prompt</br>
|
| 364 |
"""
|
| 365 |
|
| 366 |
-
demo_original_transcript = "
|
| 367 |
|
| 368 |
demo_text = {
|
| 369 |
"TTS": {
|
|
@@ -603,6 +605,7 @@ if __name__ == "__main__":
|
|
| 603 |
parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
|
| 604 |
parser.add_argument("--port", default=7860, type=int, help="App port")
|
| 605 |
parser.add_argument("--share", action="store_true", help="Launch with public url")
|
|
|
|
| 606 |
|
| 607 |
os.environ["USER"] = os.getenv("USER", "user")
|
| 608 |
args = parser.parse_args()
|
|
@@ -611,4 +614,4 @@ if __name__ == "__main__":
|
|
| 611 |
MODELS_PATH = args.models_path
|
| 612 |
|
| 613 |
app = get_app()
|
| 614 |
-
app.queue().launch(share=args.share, server_port=args.port)
|
|
|
|
| 63 |
def transcribe(self, audio_path):
|
| 64 |
return self.model.transcribe(audio_path, suppress_tokens=self.supress_tokens, word_timestamps=True)["segments"]
|
| 65 |
|
| 66 |
+
|
| 67 |
class WhisperxModel:
|
| 68 |
def __init__(self, model_name, align_model: WhisperxAlignModel):
|
| 69 |
from whisperx import load_model
|
|
|
|
| 100 |
|
| 101 |
encodec_fn = f"{MODELS_PATH}/encodec_4cb2048_giga.th"
|
| 102 |
if not os.path.exists(encodec_fn):
|
| 103 |
+
os.system(f"wget https://huggingface.co/pyp1/VoiceCraft/resolve/main/encodec_4cb2048_giga.th -O " + encodec_fn)
|
| 104 |
|
| 105 |
voicecraft_model = {
|
| 106 |
"config": config,
|
|
|
|
| 114 |
|
| 115 |
def get_transcribe_state(segments):
|
| 116 |
words_info = [word_info for segment in segments for word_info in segment["words"]]
|
| 117 |
+
transcript = " ".join([segment["text"] for segment in segments])
|
| 118 |
+
transcript = transcript[1:] if transcript[0] == " " else transcript
|
| 119 |
return {
|
| 120 |
"segments": segments,
|
| 121 |
+
"transcript": transcript,
|
| 122 |
"words_info": words_info,
|
| 123 |
"transcript_with_start_time": " ".join([f"{word['start']} {word['word']}" for word in words_info]),
|
| 124 |
"transcript_with_end_time": " ".join([f"{word['word']} {word['end']}" for word in words_info]),
|
|
|
|
| 142 |
state
|
| 143 |
]
|
| 144 |
|
| 145 |
+
@spaces.GPU(duration=60)
|
| 146 |
def align_segments(transcript, audio_path):
|
| 147 |
from aeneas.executetask import ExecuteTask
|
| 148 |
from aeneas.task import Task
|
|
|
|
| 365 |
- In Edit mode write full prompt</br>
|
| 366 |
"""
|
| 367 |
|
| 368 |
+
demo_original_transcript = "But when I had approached so near to them, the common object, which the sense deceives, lost not by distance any of its marks."
|
| 369 |
|
| 370 |
demo_text = {
|
| 371 |
"TTS": {
|
|
|
|
| 605 |
parser.add_argument("--models-path", default="./pretrained_models", help="Path to voicecraft models directory")
|
| 606 |
parser.add_argument("--port", default=7860, type=int, help="App port")
|
| 607 |
parser.add_argument("--share", action="store_true", help="Launch with public url")
|
| 608 |
+
parser.add_argument("--server_name", default="127.0.0.1", type=str, help="Server name for launching the app. 127.0.0.1 for localhost; 0.0.0.0 to allow access from other machines in the local network. Might also give access to external users depends on the firewall settings.")
|
| 609 |
|
| 610 |
os.environ["USER"] = os.getenv("USER", "user")
|
| 611 |
args = parser.parse_args()
|
|
|
|
| 614 |
MODELS_PATH = args.models_path
|
| 615 |
|
| 616 |
app = get_app()
|
| 617 |
+
app.queue().launch(share=args.share, server_name=args.server_name, server_port=args.port)
|