Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import gradio as gr | |
| import tensorflow as tf | |
| import numpy as np | |
| import nltk | |
| from nltk.corpus import cmudict | |
| from scipy.io.wavfile import write | |
| # Define sample_rate as a global constant | |
| SAMPLE_RATE = 22050 | |
| # Download required NLTK data | |
| nltk.download('averaged_perceptron_tagger', quiet=True) | |
| nltk.download('cmudict', quiet=True) | |
| # Load your model from the root directory | |
| model = tf.keras.models.load_model('audio_model.h5', compile=False) | |
| # Preprocess input text | |
| def preprocess_text(text): | |
| d = cmudict.dict() | |
| words = text.lower().split() | |
| phonemes = [] | |
| for word in words: | |
| if word in d: | |
| phonemes.append(d[word][0]) | |
| else: | |
| phonemes.append(['UNKNOWN']) | |
| flattened_phonemes = [p for sublist in phonemes for p in sublist] | |
| num_features = 13 | |
| sequence_length = len(flattened_phonemes) | |
| if sequence_length == 0: | |
| return np.zeros((1, 1, num_features)) | |
| input_data = np.random.rand(sequence_length, num_features) | |
| input_data = np.expand_dims(input_data, axis=0) | |
| return input_data | |
| # Convert model output to an audio file | |
| def convert_to_audio(model_output, filename="output.wav"): | |
| if model_output.size == 0: | |
| return None | |
| normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1)) | |
| write(filename, SAMPLE_RATE, normalized_output.astype(np.float32)) | |
| return filename | |
| # Define function to generate sound effect | |
| def generate_sfx(text, duration): | |
| input_data = preprocess_text(text) | |
| if input_data.shape[1] == 0: | |
| return None | |
| prediction = model.predict(input_data) | |
| flat_prediction = prediction.flatten() | |
| if len(flat_prediction) == 0: | |
| return None | |
| num_repeats = (duration * SAMPLE_RATE // len(flat_prediction)) + 1 | |
| audio_data = np.tile(flat_prediction, num_repeats)[:duration * SAMPLE_RATE] | |
| audio_file = convert_to_audio(audio_data, filename="output.wav") | |
| return audio_file | |
| # Define the Gradio interface | |
| interface = gr.Interface( | |
| fn=generate_sfx, | |
| inputs=[ | |
| gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"), | |
| gr.Slider(minimum=1, maximum=20, value=3, step=1, label="Duration (seconds)") | |
| ], | |
| outputs=gr.Audio(label="Generated SFX", type="filepath"), | |
| title="SFX Generator from Text", | |
| description="Enter a word or sentence, and the model will generate an SFX sound.", | |
| ) | |
| # Run the interface | |
| if __name__ == "__main__": | |
| tf.config.set_visible_devices([], 'GPU') | |
| # The ValueError shows that share=True IS required for your environment. | |
| interface.launch(share=True) |