szili2011 commited on
Commit
90e00da
·
verified ·
1 Parent(s): 85d2702

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -5,6 +5,9 @@ import nltk
5
  from nltk.corpus import cmudict
6
  from scipy.io.wavfile import write
7
 
 
 
 
8
  # Download required NLTK data
9
  nltk.download('averaged_perceptron_tagger')
10
  nltk.download('cmudict')
@@ -37,18 +40,20 @@ def preprocess_text(text):
37
  return input_data
38
 
39
  # Convert model output to an audio file
40
- def convert_to_audio(model_output, filename="output.wav", sample_rate=22050):
 
41
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
42
- write(filename, sample_rate, normalized_output.astype(np.float32))
43
  return filename
44
 
45
  # Define function to generate sound effect
46
- def generate_sfx(text, duration=30):
47
  input_data = preprocess_text(text)
48
  prediction = model.predict(input_data)
49
 
50
  # Generate longer output by repeating or padding
51
- audio_data = np.tile(prediction.flatten(), (duration * sample_rate // len(prediction.flatten()) + 1))[:duration * sample_rate]
 
52
 
53
  audio_file = convert_to_audio(audio_data, filename="output.wav")
54
 
@@ -59,7 +64,8 @@ interface = gr.Interface(
59
  fn=generate_sfx,
60
  inputs=[
61
  gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
62
- gr.Slider(minimum=2, maximum=20, label="Duration (seconds)", value=30)
 
63
  ],
64
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
65
  title="SFX Generator from Text",
@@ -69,4 +75,4 @@ interface = gr.Interface(
69
  # Run the interface
70
  if __name__ == "__main__":
71
  tf.config.set_visible_devices([], 'GPU') # Disable GPU
72
- interface.launch()
 
5
  from nltk.corpus import cmudict
6
  from scipy.io.wavfile import write
7
 
8
+ # --- FIX 1: Define sample_rate as a global constant ---
9
+ SAMPLE_RATE = 22050
10
+
11
  # Download required NLTK data
12
  nltk.download('averaged_perceptron_tagger')
13
  nltk.download('cmudict')
 
40
  return input_data
41
 
42
  # Convert model output to an audio file
43
+ def convert_to_audio(model_output, filename="output.wav"):
44
+ # Now uses the global SAMPLE_RATE constant
45
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
46
+ write(filename, SAMPLE_RATE, normalized_output.astype(np.float32))
47
  return filename
48
 
49
  # Define function to generate sound effect
50
+ def generate_sfx(text, duration): # duration no longer needs a default here
51
  input_data = preprocess_text(text)
52
  prediction = model.predict(input_data)
53
 
54
  # Generate longer output by repeating or padding
55
+ # This line now works because SAMPLE_RATE is defined globally
56
+ audio_data = np.tile(prediction.flatten(), (duration * SAMPLE_RATE // len(prediction.flatten()) + 1))[:duration * SAMPLE_RATE]
57
 
58
  audio_file = convert_to_audio(audio_data, filename="output.wav")
59
 
 
64
  fn=generate_sfx,
65
  inputs=[
66
  gr.Textbox(label="Enter a Word", placeholder="Write a Word To Convert it into SFX Sound"),
67
+ # --- FIX 2: Corrected the default slider value to be within the min/max range ---
68
+ gr.Slider(minimum=2, maximum=20, value=5, label="Duration (seconds)")
69
  ],
70
  outputs=gr.Audio(label="Generated SFX", type="filepath"),
71
  title="SFX Generator from Text",
 
75
  # Run the interface
76
  if __name__ == "__main__":
77
  tf.config.set_visible_devices([], 'GPU') # Disable GPU
78
+ interface.launch()