Spaces:

amedcj
/

kmr_tts

Running

App Files Files Community

amedcj commited on Jun 16

Commit

f70b4f1

verified ·

1 Parent(s): f91b0a2

Update app.py

Browse files

Updated app.py

Files changed (1) hide show

app.py +170 -107

app.py CHANGED Viewed

@@ -1,107 +1,170 @@
-from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
-import gradio as gr
-import numpy as np
-import scipy.io.wavfile
-import tempfile
-import scipy
-from transformers import VitsModel, AutoTokenizer
-import torch
-import scipy.io.wavfile
-import gradio as gr
-import tempfile
-# Load punctuation restoration pipeline
-punctuation_model_id = "oliverguhr/fullstop-punctuation-multilang-large"
-punct_tokenizer = AutoTokenizer.from_pretrained(punctuation_model_id)
-punct_model = AutoModelForTokenClassification.from_pretrained(punctuation_model_id)
-punct_pipe = pipeline("token-classification", model=punct_model, tokenizer=punct_tokenizer, aggregation_strategy="simple")
-# Load the model and tokenizer once
-model = VitsModel.from_pretrained("facebook/mms-tts-kmr-script_latin")
-tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kmr-script_latin")
-# Simple number-to-Kurmanji-word mapping
-num2word = {
-    "0": "sifir",
-    "1": "yek",
-    "2": "du",
-    "3": "sê",
-    "4": "çar",
-    "5": "pênc",
-    "6": "şeş",
-    "7": "heft",
-    "8": "heşt",
-    "9": "neh",
-    "10": "deh",
-    "11": "yanzdeh",
-    "12": "dûwanzdeh",
-    "13": "sêzdeh",
-    "14": "çardeh",
-    "15": "panzdeh",
-    "16": "şanzdeh",
-    "17": "hevzdeh",
-    "18": "hejdeh",
-    "19": "nozdeh",
-    "20": "bîst",
-    "30": "sî",
-    "40": "çil",
-    "50": "pêncî",
-    "60": "şêst",
-    "70": "heftê",
-    "80": "heştê",
-    "90": "nod",
-    "100": "sed",
-    # You can expand this...
-}
-import re
-def replace_numbers_with_words(text):
-    def repl(match):
-        num = match.group()
-        return num2word.get(num, num)  # fallback to number if unknown
-    return re.sub(r'\b\d+\b', repl, text)
-def text_to_speech(text):
-    # Convert text to input format
-    text = restore_punctuation(text)
-    text = replace_numbers_with_words(text)
-    inputs = tokenizer(text, return_tensors="pt")
-    with torch.no_grad():
-        output = model(**inputs).waveform
-    # Save the waveform to a temporary .wav file
-    tmp_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
-    scipy.io.wavfile.write(tmp_path, rate=model.config.sampling_rate, data=output.squeeze().numpy())
-    return tmp_path
-def restore_punctuation(text):
-    results = punct_pipe(text)
-    punctuated = ""
-    for token in results:
-        word = token['word']
-        punct = token.get('entity_group', '')
-        # Simple heuristic to add punctuation after words when predicted
-        if punct == "PERIOD":
-            punctuated += word + ". "
-        elif punct == "COMMA":
-            punctuated += word + ", "
-        else:
-            punctuated += word + "  "
-    return punctuated.strip()
-# Gradio UI
-interface = gr.Interface(
-    fn=text_to_speech,
-    inputs=gr.Textbox(label="Enter Kurmanji Text"),
-    outputs=gr.Audio(label="Generated Speech"),
-    title="Kurmanji Text-to-Speech",
-    description="Type Kurmanji Kurdish (Latin script) text and hear it spoken."
-)
-interface.launch()

+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+import gradio as gr
+import numpy as np
+import scipy.io.wavfile
+import tempfile
+import os
+from transformers import VitsModel, AutoTokenizer
+import torch
+import re
+# Load punctuation restoration pipeline
+try:
+    punctuation_model_id = "oliverguhr/fullstop-punctuation-multilang-large"
+    punct_tokenizer = AutoTokenizer.from_pretrained(punctuation_model_id)
+    punct_model = AutoModelForTokenClassification.from_pretrained(punctuation_model_id)
+    punct_pipe = pipeline("token-classification", model=punct_model, tokenizer=punct_tokenizer, aggregation_strategy="simple")
+    print("Punctuation model loaded successfully")
+except Exception as e:
+    print(f"Error loading punctuation model: {e}")
+    punct_pipe = None
+# Load the TTS model and tokenizer
+try:
+    model = VitsModel.from_pretrained("facebook/mms-tts-kmr-script_latin")
+    tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kmr-script_latin")
+    print("TTS model loaded successfully")
+except Exception as e:
+    print(f"Error loading TTS model: {e}")
+    model = None
+    tokenizer = None
+# Simple number-to-Kurmanji-word mapping
+num2word = {
+    "0": "sifir",
+    "1": "yek",
+    "2": "du",
+    "3": "sê",
+    "4": "çar",
+    "5": "pênc",
+    "6": "şeş",
+    "7": "heft",
+    "8": "heşt",
+    "9": "neh",
+    "10": "deh",
+    "11": "yanzdeh",
+    "12": "dûwanzdeh",
+    "13": "sêzdeh",
+    "14": "çardeh",
+    "15": "panzdeh",
+    "16": "şanzdeh",
+    "17": "hevzdeh",
+    "18": "hejdeh",
+    "19": "nozdeh",
+    "20": "bîst",
+    "30": "sî",
+    "40": "çil",
+    "50": "pêncî",
+    "60": "şêst",
+    "70": "heftê",
+    "80": "heştê",
+    "90": "nod",
+    "100": "sed",
+}
+def replace_numbers_with_words(text):
+    def repl(match):
+        num = match.group()
+        return num2word.get(num, num)  # fallback to number if unknown
+    return re.sub(r'\b\d+\b', repl, text)
+def restore_punctuation(text):
+    if punct_pipe is None:
+        return text  # Return original text if punctuation model failed to load
+    try:
+        results = punct_pipe(text)
+        punctuated = ""
+        for token in results:
+            word = token['word']
+            punct = token.get('entity_group', '')
+            # Simple heuristic to add punctuation after words when predicted
+            if punct == "PERIOD":
+                punctuated += word + ". "
+            elif punct == "COMMA":
+                punctuated += word + ", "
+            else:
+                punctuated += word + " "
+        return punctuated.strip()
+    except Exception as e:
+        print(f"Error in punctuation restoration: {e}")
+        return text  # Return original text if punctuation fails
+def text_to_speech(text):
+    try:
+        # Check if models are loaded
+        if model is None or tokenizer is None:
+            return None, "Error: TTS model failed to load"
+        if not text or text.strip() == "":
+            return None, "Please enter some text"
+        print(f"Processing text: {text}")
+        # Convert text to input format
+        processed_text = restore_punctuation(text)
+        processed_text = replace_numbers_with_words(processed_text)
+        print(f"Processed text: {processed_text}")
+        # Tokenize input
+        inputs = tokenizer(processed_text, return_tensors="pt")
+        # Generate audio
+        with torch.no_grad():
+            output = model(**inputs).waveform
+        # Convert to numpy array
+        waveform = output.squeeze().numpy()
+        # Create temporary file with proper cleanup
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            tmp_path = tmp_file.name
+        # Write audio file
+        scipy.io.wavfile.write(
+            tmp_path,
+            rate=model.config.sampling_rate,
+            data=waveform
+        )
+        print(f"Audio saved to: {tmp_path}")
+        return tmp_path
+    except Exception as e:
+        error_msg = f"Error in text_to_speech: {str(e)}"
+        print(error_msg)
+        return None
+# Create Gradio interface with better error handling
+def create_interface():
+    interface = gr.Interface(
+        fn=text_to_speech,
+        inputs=gr.Textbox(
+            label="Enter Kurmanji Text",
+            placeholder="Type your Kurmanji Kurdish text here...",
+            lines=3
+        ),
+        outputs=gr.Audio(label="Generated Speech"),
+        title="Kurmanji Text-to-Speech",
+        description="Type Kurmanji Kurdish (Latin script) text and hear it spoken.",
+        examples=[
+            ["Silav! Ez bi xêr im."],
+            ["Tu çawa yî?"],
+            ["Navê min Kurdî ye."]
+        ],
+        cache_examples=False
+    )
+    return interface
+if __name__ == "__main__":
+    # Check if we're running on Hugging Face Spaces
+    if "SPACE_ID" in os.environ:
+        print("Running on Hugging Face Spaces")
+    interface = create_interface()
+    interface.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860
+    )