Spaces:

dpc
/

mmstts

Running

App Files Files Community

mmstts / app.py

dpc

Update app.py

f1343f1 over 1 year ago

raw

history blame contribute delete

6.63 kB

	# Based on example code of https://huggingface.co/facebook/m2m100_1.2B
	# and https://github.com/wannaphong/ttsmms
	# See also https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md

	import gradio as gr
	import os
	import re
	import soundfile as sf

	import json
	import nltk
	from underthesea import sent_tokenize as vie_sent_tokenize # Vietnamese NLP toolkit
	from underthesea import text_normalize as vie_text_normalize
	from nltk import sent_tokenize as nltk_sent_tokenize
	from ttsmms import download
	from ttsmms import TTS

	from collections import OrderedDict
	import uuid
	import datetime
	import shutil
	from num2words import num2words


	this_description = """Text To Speech for [1000+ languages](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) - using [fairseq MMS TTS](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md) and [ttsmms](https://github.com/wannaphong/ttsmms) wrapper.
	Please note that for some languages, it may not pronounce all words correctly (yet).
	"""

	nltk.download("punkt")

	# Pre-download some languages
	tts_models = {}
	eng_path = download("eng", "./data")
	tts_models["eng"] = eng_path
	vie_path = download("vie", "./data")
	tts_models["vie"] = vie_path
	mya_path = download("mya", "./data")
	tts_models["mya"] = mya_path

	lang_codes = OrderedDict()

	language_names = list(lang_codes.keys())
	with open("lang_code.txt", "r") as file:
	for line in file:
	line = line.strip()
	if line.startswith("----"):
	continue
	iso, lang = line.split("\t", 1)
	lang_codes[lang + " (" + iso + ")"] = iso

	language_names = list(lang_codes.keys())

	# Load num2words_lang_map
	with open("num2words_lang_map.json") as f:
	num2words_lang_map = json.load(f, object_pairs_hook=OrderedDict)


	def convert_numbers_to_words_num2words(text, lang):
	# Find all numbers in the text using regex
	numbers = re.findall(r"\d+", text)
	# Sort numbers in descending order of length
	sorted_numbers = sorted(numbers, key=len, reverse=True)
	print(sorted_numbers)

	# Replace numbers with their word equivalents
	for number in sorted_numbers:
	number_word = num2words(int(number), lang=num2words_lang_map[lang][0])
	text = text.replace(number, number_word)

	return text


	def convert_mya_numbers_to_words(text):
	from mm_num2word import mm_num2word, extract_num

	numbers = extract_num(text)
	sorted_numbers = sorted(numbers, key=len, reverse=True)
	print(sorted_numbers)

	for n in sorted_numbers:
	text = text.replace(n, mm_num2word(n))
	return text


	def prepare_sentences(text, lang="mya"):
	sentences = []
	# pre-process the text for some languages
	if lang.lower() == "mya":
	text = convert_mya_numbers_to_words(text)
	text = text.replace("\u104A", ",").replace("\u104B", ".")

	if lang in num2words_lang_map:
	print("num2words supports this lang", lang)
	text = convert_numbers_to_words_num2words(text, lang)
	print("Processed text", text)

	# Not sure why this can fix unclear pronunciation for the first word of vie
	text = text.lower()

	paragraphs = [paragraph for paragraph in text.split("\n") if paragraph.strip()]

	if lang.lower() == "vie":
	for paragraph in paragraphs:
	sentences_raw = vie_sent_tokenize(paragraph)
	sentences.extend(
	[
	vie_text_normalize(sentence)
	for sentence in sentences_raw
	if sentence.strip()
	]
	)
	else:
	sentences = [
	sentence
	for paragraph in paragraphs
	for sentence in nltk_sent_tokenize(paragraph)
	if sentence.strip()
	]
	return sentences


	def list_dir(lang):
	# Get the current directory
	current_dir = os.getcwd()
	print(current_dir)

	# List all files in the current directory
	files = os.listdir(current_dir)

	# Filter the list to include only WAV files
	wav_files = [file for file in files if file.endswith(".wav")]
	print("Total wav files:", len(wav_files))

	# Print the last WAV file
	sorted_list = sorted(wav_files)
	print(lang, sorted_list[-1])


	def combine_wav(source_dir, stamp, lang):
	# Get a list of all WAV files in the folder
	wav_files = [file for file in os.listdir(source_dir) if file.endswith(".wav")]

	# Sort the files alphabetically to ensure the correct order of combination
	wav_files.sort()

	# Combine the WAV files
	combined_data = []
	for file in wav_files:
	file_path = os.path.join(source_dir, file)
	data, sr = sf.read(file_path)
	combined_data.extend(data)

	# Save the combined audio to a new WAV file
	combined_file_path = f"{stamp}_{lang}.wav"
	sf.write(combined_file_path, combined_data, sr)

	shutil.rmtree(source_dir)
	list_dir(lang)

	# Display the combined audio in the Hugging Face Space app
	return combined_file_path


	def mms_tts(Input_Text, lang_name="Burmese (mya)"):
	# lang_code = lang_codes[lang_name]
	try:
	lang_code = lang_codes[lang_name]
	except KeyError:
	lang_code = "mya"

	user_model = download(lang_code, "./data")
	tts = TTS(user_model)

	sentences = prepare_sentences(Input_Text, lang_code)

	# output_dir = f"out_{lang_code}"
	current_datetime = datetime.datetime.now()
	timestamp = current_datetime.strftime("%Y%m%d%H%M%S%f")

	user_dir = f"u_{timestamp}"
	if os.path.exists(user_dir):
	session_id = str(uuid.uuid4()) # Generate a random session ID
	user_dir = f"u_{session_id}_{timestamp}"
	os.makedirs(user_dir, exist_ok=True)
	print("New user directory", user_dir)

	for i, sentence in enumerate(sentences):
	tts.synthesis(sentence, wav_path=f"{user_dir}/s_{str(i).zfill(10)}.wav")
	combined_file_path = combine_wav(user_dir, timestamp, lang_code)
	return combined_file_path


	# common_languages = ["eng", "mya", "vie"] # List of common language codes
	iface = gr.Interface(
	fn=mms_tts,
	title="Massively Multilingual Speech (MMS) - Text To Speech",
	description=this_description,
	inputs=[
	gr.Textbox(lines=5, placeholder="Enter text (unlimited sentences)", label="Input text (unlimited sentences)"),
	gr.Dropdown(
	choices=language_names,
	label="Select language 1,000+",
	value="Burmese (mya)",
	),
	],
	outputs="audio",
	)
	# outputs=[
	# "audio",
	# gr.File(label="Download", type="file", download_to="done.wav")
	# ])


	iface.launch()