szili2011 commited on
Commit
c3f5e81
·
verified ·
1 Parent(s): 809a47f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -21
app.py CHANGED
@@ -25,7 +25,6 @@ def preprocess_text(text):
25
  if word in d:
26
  phonemes.append(d[word][0])
27
  else:
28
- # Use a placeholder for words not found in cmudict
29
  phonemes.append(['UNKNOWN'])
30
 
31
  # Flatten the list of phonemes
@@ -42,7 +41,7 @@ def preprocess_text(text):
42
  return input_data
43
 
44
  # Convert model output to an audio file
45
- def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
46
  """
47
  Convert the model output into a .wav file.
48
  """
@@ -52,40 +51,60 @@ def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
52
 
53
  # Normalize the audio output
54
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
 
 
 
55
 
56
- # Write the audio data to a file
57
- write(filename, sample_rate, normalized_output)
58
-
59
- return filename
60
-
61
- # Generate sound effect
62
- def generate_sfx(text):
63
  """
64
  Takes input text, preprocesses it, runs it through the model,
65
- and generates a downloadable audio file.
66
  """
67
  input_data = preprocess_text(text)
68
 
69
- # Generate prediction
70
- prediction = model.predict(input_data)
 
 
71
 
72
- # Ensure prediction shape is correct
73
- if prediction.ndim == 2 and prediction.shape[1] > 1:
74
- prediction = prediction.flatten() # Flatten if necessary
75
 
76
- # Convert the prediction to an audio file
77
- audio_file = convert_to_audio(prediction, filename="output.wav")
78
-
79
- return audio_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # Define the Gradio interface
82
  interface = gr.Interface(
83
  fn=generate_sfx,
84
- inputs=gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
 
 
 
85
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
86
  live=False,
87
  title="SFX Generator from Text",
88
- description="Enter a word or sentence, and the model will generate an SFX sound.",
89
  )
90
 
91
  # Run the interface
 
25
  if word in d:
26
  phonemes.append(d[word][0])
27
  else:
 
28
  phonemes.append(['UNKNOWN'])
29
 
30
  # Flatten the list of phonemes
 
41
  return input_data
42
 
43
  # Convert model output to an audio file
44
+ def convert_to_audio(model_output, sample_rate=22050):
45
  """
46
  Convert the model output into a .wav file.
47
  """
 
51
 
52
  # Normalize the audio output
53
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
54
+
55
+ # Return normalized output for further processing
56
+ return normalized_output
57
 
58
+ # Generate sound effect with specified duration
59
+ def generate_sfx(text, duration=30):
 
 
 
 
 
60
  """
61
  Takes input text, preprocesses it, runs it through the model,
62
+ and generates a downloadable audio file for a specified duration.
63
  """
64
  input_data = preprocess_text(text)
65
 
66
+ # Initialize an empty list to hold audio segments
67
+ audio_segments = []
68
+ total_samples = duration * 22050 # Calculate total samples for 30 seconds
69
+ generated_samples = 0
70
 
71
+ while generated_samples < total_samples:
72
+ # Generate prediction
73
+ prediction = model.predict(input_data)
74
 
75
+ # Ensure prediction shape is correct
76
+ if prediction.ndim == 2 and prediction.shape[1] > 1:
77
+ prediction = prediction.flatten() # Flatten if necessary
78
+
79
+ # Convert the prediction to audio data
80
+ audio_segment = convert_to_audio(prediction)
81
+
82
+ # Append the generated segment to the list
83
+ audio_segments.append(audio_segment)
84
+
85
+ # Increment the total samples generated
86
+ generated_samples += len(audio_segment)
87
+
88
+ # Concatenate all segments to form the final audio output
89
+ final_audio = np.concatenate(audio_segments)[:total_samples] # Ensure we cut to the correct length
90
+
91
+ # Write the audio data to a file
92
+ output_filename = "output.wav"
93
+ write(output_filename, 22050, final_audio)
94
+
95
+ return output_filename
96
 
97
  # Define the Gradio interface
98
  interface = gr.Interface(
99
  fn=generate_sfx,
100
+ inputs=[
101
+ gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
102
+ gr.Slider(label="Duration (seconds)", minimum=1, maximum=60, value=30) # Added duration slider
103
+ ],
104
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
105
  live=False,
106
  title="SFX Generator from Text",
107
+ description="Enter a word or sentence, and the model will generate an SFX sound for the specified duration.",
108
  )
109
 
110
  # Run the interface