import gradio as gr import tensorflow as tf import numpy as np import nltk from nltk.corpus import cmudict from scipy.io.wavfile import write # Define sample_rate as a global constant SAMPLE_RATE = 22050 # Download required NLTK data nltk.download('averaged_perceptron_tagger', quiet=True) nltk.download('cmudict', quiet=True) # Load your model from the root directory model = tf.keras.models.load_model('audio_model.h5', compile=False) # Preprocess input text def preprocess_text(text): d = cmudict.dict() words = text.lower().split() phonemes = [] for word in words: if word in d: phonemes.append(d[word][0]) else: phonemes.append(['UNKNOWN']) flattened_phonemes = [p for sublist in phonemes for p in sublist] num_features = 13 sequence_length = len(flattened_phonemes) if sequence_length == 0: return np.zeros((1, 1, num_features)) input_data = np.random.rand(sequence_length, num_features) input_data = np.expand_dims(input_data, axis=0) return input_data # Convert model output to an audio file def convert_to_audio(model_output, filename="output.wav"): if model_output.size == 0: return None normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1)) write(filename, SAMPLE_RATE, normalized_output.astype(np.float32)) return filename # Define function to generate sound effect def generate_sfx(text, duration): input_data = preprocess_text(text) if input_data.shape[1] == 0: return None prediction = model.predict(input_data) flat_prediction = prediction.flatten() if len(flat_prediction) == 0: return None num_repeats = (duration * SAMPLE_RATE // len(flat_prediction)) + 1 audio_data = np.tile(flat_prediction, num_repeats)[:duration * SAMPLE_RATE] audio_file = convert_to_audio(audio_data, filename="output.wav") return audio_file # Define the Gradio interface interface = gr.Interface( fn=generate_sfx, inputs=[ gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"), gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Duration (seconds)") ], outputs=gr.Audio(label="Generated SFX", type="filepath"), title="SFX Generator from Text", description="Enter a word or sentence, and the model will generate an SFX sound.", ) # Run the interface if __name__ == "__main__": tf.config.set_visible_devices([], 'GPU') # The ValueError shows that share=True IS required for your environment. interface.launch()