Moore-Language-Space-ZeroGPU

Sleeping

App Files Files Community

ANYANTUDRE commited on Oct 22, 2024

Commit

1c7cbff

1 Parent(s): 1885a88

app almost ready

Browse files

Files changed (7) hide show

.gitignore +167 -0
app.py +65 -36
goai_helpers/goai_stt2.py +9 -1
goai_helpers/goai_stt_ttt_pipeline.py +44 -0
goai_helpers/goai_traduction.py +1 -1
goai_helpers/goai_tts2.py +6 -9
goai_helpers/goai_ttt_tts_pipeline.py +70 -0

.gitignore CHANGED Viewed

	@@ -0,0 +1,167 @@

+# My dirs
+exples_voix/
+audios/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+.venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py CHANGED Viewed

@@ -1,15 +1,9 @@
-import spaces
-import torch
-import scipy
-import torchaudio
 import gradio as gr
-from transformers import pipeline, set_seed
 from huggingface_hub import login
 import os
 from languages import get_language_names
-from goai_helpers import goai_traduction, goai_stt, goai_stt2, goai_tts,  goai_tts2
 auth_token = os.getenv('HF_SPACE_TOKEN')
@@ -18,7 +12,7 @@ login(token=auth_token)
 # list all files in the ./audios directory for the dropdown
 AUDIO_FILES = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
 DESCRIPTION = """<div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap;">
                     <div style="flex: 1; min-width: 250px;">
                         Ce modèle de traduction vers la <b>langue Mooré</b> a été développé from scratch par <b>GO AI CORP</b> et la version disponible en test est celle à 700 millions de paramètres.
@@ -52,8 +46,26 @@ LANG_TO_ID = {
 demo = gr.Blocks(theme=gr.themes.Ocean())
-goai_stt = gr.Interface(
-    fn=goai_stt2.goai_stt2,
     inputs=[
         gr.Audio(sources=["microphone", "upload"], type="filepath"),
         gr.Dropdown(
@@ -76,23 +88,38 @@ goai_stt = gr.Interface(
               ["./audios/example4.mp3", "yõk foto"]
              ],
     cache_examples=False,
-    title="Mooré ASR: Transcribe Audio",
     description=DESCRIPTION,
     flagging_mode="auto",
 )
-goai_tts = gr.Interface(
-    fn=goai_tts2.goai_ttt_tts,
     inputs=[
-        gr.Text(label="Texte à traduire", lines=2, value="Par cette ouverture, le centre se veut contribuer à la formation professionnelle des jeunes et des femmes, renforcer les capacités des acteurs du monde agricole, et contribuer à la lutte contre le chômage au Burkina Faso."),
-        gr.Dropdown(label="Voix", choices=audio_files, value="exple_voix_masculine.wav"),
-        gr.Audio(label="Cloner votre voix (optionel)", type="numpy", format="wav"),
     ],
     outputs=[
         gr.Text(label="Texte traduit"),
-        gr.Audio(label="Audio original généré", format="wav"),
-        gr.Audio(label="Denoised Audio", format='wav'),
-        gr.Audio(label="Enhanced Audio", format='wav')
     ],
     examples=[["Ils vont bien, merci. Mon père travaille dur dans les champs et ma mère est toujours occupée à la maison.", "exple_voix_masculine.wav", None],
               ["La finale s’est jouée en présence du Président du Faso, Ibrahim Traoré.", "exple_voix_feminine.wav", None],
@@ -104,29 +131,31 @@ goai_tts = gr.Interface(
     description=DESCRIPTION,
 )
-goai_traduction = gr.Interface(
-    fn=goai_traduction.goai_traduction,
     inputs=[
-        gr.Textbox(label="Texte", placeholder="Yaa sõama"),
-        gr.Dropdown(label="Langue source", choices=["fra_Latn", "mos_Latn"], value='fra_Latn'),
-        gr.Dropdown(label="Langue cible", choices=["fra_Latn", "mos_Latn"], value='mos_Latn')
-    ],
-    outputs=["text"],
-    examples=[["Yʋʋm a wãn la b kẽesd biig lekolle?", "mos_Latn", "fra_Latn"],
-              ["Zak-soab la kasma.", "mos_Latn", "fra_Latn"],
-              ["Le gouvernement avait pris des mesures louables par rapport à l’augmentation des prix de certaines denrées alimentaires.", "fra_Latn", "mos_Latn"],
-              ["Comme lors du match face à la Côte d’Ivoire, c’est sur un coup de pied arrêté que les Etalons encaissent leur but.", "fra_Latn", "mos_Latn"],
     ],
     cache_examples=False,
-    title="Traduction du Mooré: texte vers texte",
-    description=DESCRIPTION
 )
 with demo:
     gr.TabbedInterface(
-        interface_list=[goai_stt, goai_tts, goai_traduction],
-        tab_names=["Microphone & Audio file"]
     )
-demo.queue().launch(ssr_mode=False)

 import gradio as gr
 from huggingface_hub import login
 import os
 from languages import get_language_names
+from goai_helpers import goai_traduction, goai_stt, goai_stt2, goai_tts,  goai_tts2, goai_ttt_tts_pipeline, goai_stt_ttt_pipeline
 auth_token = os.getenv('HF_SPACE_TOKEN')
 # list all files in the ./audios directory for the dropdown
 AUDIO_FILES = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
+MODELES_TTS = ["ArissBandoss/coqui-tts-moore-V1", "ArissBandoss/mms-tts-mos-V18"]
 DESCRIPTION = """<div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap;">
                     <div style="flex: 1; min-width: 250px;">
                         Ce modèle de traduction vers la <b>langue Mooré</b> a été développé from scratch par <b>GO AI CORP</b> et la version disponible en test est celle à 700 millions de paramètres.
 demo = gr.Blocks(theme=gr.themes.Ocean())
+goai_traduction_if = gr.Interface(
+    fn=goai_traduction.goai_traduction,
+    inputs=[
+        gr.Textbox(label="Texte", placeholder="Yaa sõama"),
+        gr.Dropdown(label="Langue source", choices=["fra_Latn", "mos_Latn"], value='fra_Latn'),
+        gr.Dropdown(label="Langue cible", choices=["fra_Latn", "mos_Latn"], value='mos_Latn')
+    ],
+    outputs=["text"],
+    examples=[["Yʋʋm a wãn la b kẽesd biig lekolle?", "mos_Latn", "fra_Latn"],
+              ["Zak-soab la kasma.", "mos_Latn", "fra_Latn"],
+              ["Le gouvernement avait pris des mesures louables par rapport à l’augmentation des prix de certaines denrées alimentaires.", "fra_Latn", "mos_Latn"],
+              ["Comme lors du match face à la Côte d’Ivoire, c’est sur un coup de pied arrêté que les Etalons encaissent leur but.", "fra_Latn", "mos_Latn"],
+    ],
+    cache_examples=False,
+    title="Traduction Mooré-Francais",
+    description=DESCRIPTION
+)
+goai_stt_if = gr.Interface(
+    fn=goai_stt2.transcribe,
     inputs=[
         gr.Audio(sources=["microphone", "upload"], type="filepath"),
         gr.Dropdown(
               ["./audios/example4.mp3", "yõk foto"]
              ],
     cache_examples=False,
+    title="Mooré ASR",
     description=DESCRIPTION,
     flagging_mode="auto",
 )
+goai_ttt_tts_pipeline_if = gr.Interface(
+    fn=goai_ttt_tts_pipeline.goai_ttt_tts,
     inputs=[
+        gr.Text(
+            label="Texte à traduire",
+            lines=3,
+            value="Par cette ouverture, le centre se veut contribuer à la formation professionnelle des jeunes et des femmes, renforcer les capacités des acteurs du monde agricole, et contribuer à la lutte contre le chômage au Burkina Faso."
+        ),
+        gr.Dropdown(
+            label="Modèles de TTS",
+            choices=MODELES_TTS,
+            value="ArissBandoss/coqui-tts-moore-V1"
+        ),
+        gr.Dropdown(
+            label="Voix",
+            choices=AUDIO_FILES,
+            value="exple_voix_masculine.wav"
+        ),
+        gr.Audio(
+            label="Cloner votre voix (optionel)",
+            type="numpy",
+            format="wav"
+        ),
     ],
     outputs=[
         gr.Text(label="Texte traduit"),
+        gr.Audio(label="Audio généré", format="wav"),
     ],
     examples=[["Ils vont bien, merci. Mon père travaille dur dans les champs et ma mère est toujours occupée à la maison.", "exple_voix_masculine.wav", None],
               ["La finale s’est jouée en présence du Président du Faso, Ibrahim Traoré.", "exple_voix_feminine.wav", None],
     description=DESCRIPTION,
 )
+goai_stt_ttt_pipeline_if = gr.Interface(
+    fn=goai_stt_ttt_pipeline.goai_stt_ttt,
     inputs=[
+        gr.Audio(sources=["microphone", "upload"], type="filepath"),
+        gr.Slider(label="Batch Size", minimum=1, maximum=32, value=8, step=1),
+        gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
+        gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
     ],
+    outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
+    examples=[["./audios/example1.mp3", "a ye ligdi"],
+              ["./audios/example2.mp3", "zoe nimbãanega"],
+              ["./audios/example3.mp3", "zãng-zãnga"],
+              ["./audios/example4.mp3", "yõk foto"]
+             ],
     cache_examples=False,
+    title="Mooré ASR",
+    description=DESCRIPTION,
+    flagging_mode="auto",
 )
 with demo:
     gr.TabbedInterface(
+        interface_list=[goai_traduction_if, goai_stt_if, goai_ttt_tts_pipeline_if, goai_stt_ttt_pipeline_if],
+        tab_names=["Traduction Mooré-Francais", "Mooré ASR", "Mooré TTS & Traduction", "Mooré ASR & Traduction"]
     )
+demo.queue().launch(ssr_mode=False)

goai_helpers/goai_stt2.py CHANGED Viewed

@@ -6,7 +6,15 @@ DEVICE = 0 if torch.cuda.is_available() else "cpu"
 @spaces.GPU
-def transcribe(inputs, model, language, batch_size, chunk_length_s, stride_length_s):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

 @spaces.GPU
+def transcribe(
+        inputs,
+        model,
+        language,
+        batch_size,
+        chunk_length_s,
+        stride_length_s
+    ):
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

goai_helpers/goai_stt_ttt_pipeline.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os
+import spaces
+from huggingface_hub import login
+from goai_helpers.goai_traduction import goai_traduction
+from goai_helpers.goai_stt2 import transcribe
+# authentification
+auth_token = os.getenv('HF_SPACE_TOKEN')
+login(token=auth_token)
+MODEL_ASR = "ArissBandoss/whisper-small-mos"
+LANGUAGE  = "Automatic Detection"
+# gradio interface translation and text to speech function
+@spaces.GPU(duration=120)
+def goai_stt_ttt(
+        inputs,
+        batch_size,
+        chunk_length_s,
+        stride_length_s,
+        model=MODEL_ASR,
+        language=LANGUAGE,
+    ):
+    # 1. STT: Speech To Text
+    mos_text = transcribe(
+        inputs,
+        batch_size,
+        chunk_length_s,
+        stride_length_s,
+        model=model,
+        language=language,
+    )
+    yield mos_text, None
+    # 2. TTT: Translation mos ==> fr
+    fr_text = goai_traduction.goai_traduction(
+        mos_text,
+        src_lang="fra_Latn",
+        tgt_lang="mos_Latn"
+    )
+    yield mos_text, fr_text

goai_helpers/goai_traduction.py CHANGED Viewed

@@ -3,10 +3,10 @@ import spaces
 from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 import os
 from huggingface_hub import login
 max_length = 512
 auth_token = os.getenv('HF_SPACE_TOKEN')
 login(token=auth_token)
-#auth_token = os.environ["HF_SPACE_TOKEN"]
 @spaces.GPU

 from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 import os
 from huggingface_hub import login
 max_length = 512
 auth_token = os.getenv('HF_SPACE_TOKEN')
 login(token=auth_token)
 @spaces.GPU

goai_helpers/goai_tts2.py CHANGED Viewed

@@ -1,23 +1,16 @@
 import os
-import re
 import time
 import torch
 import torchaudio
 import spaces
-import requests
 import tempfile
-import concurrent
-import numpy as np
 from tqdm import tqdm
 from typing import Optional, Tuple
 from huggingface_hub import hf_hub_download, hf_hub_url, login
-from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
-from resemble_enhance.enhancer.inference import denoise, enhance
-from flore200_codes import flores_codes
 from goai_helpers.utils import download_file, diviser_phrases_moore, enhance_speech
 from goai_helpers.goai_traduction import goai_traduction
@@ -284,7 +277,11 @@ def goai_ttt_tts(
 ):
     # translation
-    mos_text = goai_traduction.goai_traduction(text, src_lang="fra_Latn", tgt_lang="mos_Latn")
     yield mos_text, None, None, None
     # TTS pipeline
@@ -310,4 +307,4 @@ def goai_ttt_tts(
         denoise_before_enhancement
     )
-    yield mos_text, (sampling_rate, audio_array.numpy()), denoised_audio, enhanced_audio

 import os
 import time
 import torch
 import torchaudio
 import spaces
 import tempfile
 from tqdm import tqdm
 from typing import Optional, Tuple
 from huggingface_hub import hf_hub_download, hf_hub_url, login
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 from goai_helpers.utils import download_file, diviser_phrases_moore, enhance_speech
 from goai_helpers.goai_traduction import goai_traduction
 ):
     # translation
+    mos_text = goai_traduction(
+                        text,
+                        src_lang="fra_Latn",
+                        tgt_lang="mos_Latn"
+                    )
     yield mos_text, None, None, None
     # TTS pipeline
         denoise_before_enhancement
     )
+    yield mos_text, (sampling_rate, audio_array.numpy()), denoised_audio, enhanced_audio

goai_helpers/goai_ttt_tts_pipeline.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import torch
+import spaces
+from huggingface_hub import login
+from goai_helpers.goai_traduction import goai_traduction
+from goai_helpers.goai_tts2 import MooreTTS, text_to_speech
+from goai_helpers.goai_tts import goai_tts
+# authentification
+auth_token = os.getenv('HF_SPACE_TOKEN')
+login(token=auth_token)
+# gradio interface text to speech function
+@spaces.GPU
+def goai_many_tts(
+        text,
+        tts_model,
+        reference_speaker,
+        reference_audio=None,
+    ):
+    if tts_model == "ArissBandoss/coqui-tts-moore-V1":
+        # TTS pipeline
+        tts = MooreTTS(tts_model)
+        reference_speaker = os.path.join("./exples_voix", reference_speaker)
+        # convert translated text to speech with reference audio
+        if reference_audio is not None:
+            audio_array, sampling_rate = text_to_speech(tts, text, reference_speaker, reference_audio)
+        else:
+            audio_array, sampling_rate = text_to_speech(tts, text, reference_speaker=reference_speaker)
+        return text, (sampling_rate, audio_array.numpy())
+    elif tts_model == "ArissBandoss/mms-tts-mos-V18":
+        sample_rate, audio_data = goai_tts(text)
+        return text, (sample_rate, audio_data)
+    else:
+        print("Erreur de modèle!!! Veuillez vérifier le modèle sélectionné.")
+# gradio interface translation and text to speech function
+@spaces.GPU(duration=120)
+def goai_ttt_tts(
+        text,
+        tts_model,
+        reference_speaker,
+        reference_audio=None,
+    ):
+    # 1. TTT: Translation fr ==> mos
+    mos_text = goai_traduction.goai_traduction(
+        text,
+        src_lang="fra_Latn",
+        tgt_lang="mos_Latn"
+    )
+    yield mos_text, None
+    # 2. TTS: Text to Speech
+    sample_rate, audio_data = goai_many_tts(
+        text,
+        tts_model,
+        reference_speaker,
+        reference_audio=reference_audio,
+    )
+    yield mos_text, (sample_rate, audio_data)