Spaces:

szili2011
/

FNaF-Audio-Generation

Runtime error

App Files Files Community

szili2011 commited on Sep 24, 2024

Commit

c3f5e81

verified ·

1 Parent(s): 809a47f

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -21

app.py CHANGED Viewed

@@ -25,7 +25,6 @@ def preprocess_text(text):
         if word in d:
             phonemes.append(d[word][0])
         else:
-            # Use a placeholder for words not found in cmudict
             phonemes.append(['UNKNOWN'])
     # Flatten the list of phonemes
@@ -42,7 +41,7 @@ def preprocess_text(text):
     return input_data
 # Convert model output to an audio file
-def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
     """
     Convert the model output into a .wav file.
     """
@@ -52,40 +51,60 @@ def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
     # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
-    # Write the audio data to a file
-    write(filename, sample_rate, normalized_output)
-    return filename
-# Generate sound effect
-def generate_sfx(text):
     """
     Takes input text, preprocesses it, runs it through the model,
-    and generates a downloadable audio file.
     """
     input_data = preprocess_text(text)
-    # Generate prediction
-    prediction = model.predict(input_data)
-    # Ensure prediction shape is correct
-    if prediction.ndim == 2 and prediction.shape[1] > 1:
-        prediction = prediction.flatten()  # Flatten if necessary
-    # Convert the prediction to an audio file
-    audio_file = convert_to_audio(prediction, filename="output.wav")
-    return audio_file
 # Define the Gradio interface
 interface = gr.Interface(
     fn=generate_sfx,
-    inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     live=False,
     title="SFX Generator from Text",
-    description="Enter a word or sentence, and the model will generate an SFX sound.",
 )
 # Run the interface

         if word in d:
             phonemes.append(d[word][0])
         else:
             phonemes.append(['UNKNOWN'])
     # Flatten the list of phonemes
     return input_data
 # Convert model output to an audio file
+def convert_to_audio(model_output, sample_rate=22050):
     """
     Convert the model output into a .wav file.
     """
     # Normalize the audio output
     normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
+    # Return normalized output for further processing
+    return normalized_output
+# Generate sound effect with specified duration
+def generate_sfx(text, duration=30):
     """
     Takes input text, preprocesses it, runs it through the model,
+    and generates a downloadable audio file for a specified duration.
     """
     input_data = preprocess_text(text)
+    # Initialize an empty list to hold audio segments
+    audio_segments = []
+    total_samples = duration * 22050  # Calculate total samples for 30 seconds
+    generated_samples = 0
+    while generated_samples < total_samples:
+        # Generate prediction
+        prediction = model.predict(input_data)
+        # Ensure prediction shape is correct
+        if prediction.ndim == 2 and prediction.shape[1] > 1:
+            prediction = prediction.flatten()  # Flatten if necessary
+        # Convert the prediction to audio data
+        audio_segment = convert_to_audio(prediction)
+        # Append the generated segment to the list
+        audio_segments.append(audio_segment)
+        # Increment the total samples generated
+        generated_samples += len(audio_segment)
+    # Concatenate all segments to form the final audio output
+    final_audio = np.concatenate(audio_segments)[:total_samples]  # Ensure we cut to the correct length
+    # Write the audio data to a file
+    output_filename = "output.wav"
+    write(output_filename, 22050, final_audio)
+    return output_filename
 # Define the Gradio interface
 interface = gr.Interface(
     fn=generate_sfx,
+    inputs=[
+        gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
+        gr.Slider(label="Duration (seconds)", minimum=1, maximum=60, value=30)  # Added duration slider
+    ],
     outputs=gr.Audio(label="Generated SFX", type="filepath"),
     live=False,
     title="SFX Generator from Text",
+    description="Enter a word or sentence, and the model will generate an SFX sound for the specified duration.",
 )
 # Run the interface