szili2011 commited on
Commit
012a8ba
·
verified ·
1 Parent(s): d5a7fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -13,8 +13,6 @@ nltk.download('averaged_perceptron_tagger', quiet=True)
13
  nltk.download('cmudict', quiet=True)
14
 
15
  # Load your model from the root directory
16
- # Add compile=False as it's often needed for inference-only models
17
- # and can resolve some loading warnings.
18
  model = tf.keras.models.load_model('audio_model.h5', compile=False)
19
 
20
  # Preprocess input text
@@ -31,24 +29,20 @@ def preprocess_text(text):
31
 
32
  flattened_phonemes = [p for sublist in phonemes for p in sublist]
33
 
34
- # Create dummy 13-feature vectors for each phoneme (implement your own feature extraction)
35
  num_features = 13
36
  sequence_length = len(flattened_phonemes)
37
- if sequence_length == 0: # Handle empty input
38
  return np.zeros((1, 1, num_features))
39
 
40
  input_data = np.random.rand(sequence_length, num_features)
41
-
42
- # Add batch dimension
43
- input_data = np.expand_dims(input_data, axis=0) # Shape (1, sequence_length, 13)
44
 
45
  return input_data
46
 
47
  # Convert model output to an audio file
48
  def convert_to_audio(model_output, filename="output.wav"):
49
- if model_output.size == 0: # Handle empty output
50
  return None
51
- # Normalize audio to be between -1 and 1
52
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
53
  write(filename, SAMPLE_RATE, normalized_output.astype(np.float32))
54
  return filename
@@ -57,17 +51,15 @@ def convert_to_audio(model_output, filename="output.wav"):
57
  def generate_sfx(text, duration):
58
  input_data = preprocess_text(text)
59
 
60
- # Check for empty input after preprocessing
61
  if input_data.shape[1] == 0:
62
- return None # Return None to clear the audio component
63
 
64
  prediction = model.predict(input_data)
65
-
66
  flat_prediction = prediction.flatten()
 
67
  if len(flat_prediction) == 0:
68
  return None
69
 
70
- # Generate longer output by repeating or padding
71
  num_repeats = (duration * SAMPLE_RATE // len(flat_prediction)) + 1
72
  audio_data = np.tile(flat_prediction, num_repeats)[:duration * SAMPLE_RATE]
73
 
@@ -89,6 +81,6 @@ interface = gr.Interface(
89
 
90
  # Run the interface
91
  if __name__ == "__main__":
92
- tf.config.set_visible_devices([], 'GPU') # Disable GPU
93
- # --- FIX: Remove share=True for Hugging Face Spaces ---
94
- interface.launch()
 
13
  nltk.download('cmudict', quiet=True)
14
 
15
  # Load your model from the root directory
 
 
16
  model = tf.keras.models.load_model('audio_model.h5', compile=False)
17
 
18
  # Preprocess input text
 
29
 
30
  flattened_phonemes = [p for sublist in phonemes for p in sublist]
31
 
 
32
  num_features = 13
33
  sequence_length = len(flattened_phonemes)
34
+ if sequence_length == 0:
35
  return np.zeros((1, 1, num_features))
36
 
37
  input_data = np.random.rand(sequence_length, num_features)
38
+ input_data = np.expand_dims(input_data, axis=0)
 
 
39
 
40
  return input_data
41
 
42
  # Convert model output to an audio file
43
  def convert_to_audio(model_output, filename="output.wav"):
44
+ if model_output.size == 0:
45
  return None
 
46
  normalized_output = np.interp(model_output, (model_output.min(), model_output.max()), (-1, 1))
47
  write(filename, SAMPLE_RATE, normalized_output.astype(np.float32))
48
  return filename
 
51
  def generate_sfx(text, duration):
52
  input_data = preprocess_text(text)
53
 
 
54
  if input_data.shape[1] == 0:
55
+ return None
56
 
57
  prediction = model.predict(input_data)
 
58
  flat_prediction = prediction.flatten()
59
+
60
  if len(flat_prediction) == 0:
61
  return None
62
 
 
63
  num_repeats = (duration * SAMPLE_RATE // len(flat_prediction)) + 1
64
  audio_data = np.tile(flat_prediction, num_repeats)[:duration * SAMPLE_RATE]
65
 
 
81
 
82
  # Run the interface
83
  if __name__ == "__main__":
84
+ tf.config.set_visible_devices([], 'GPU')
85
+ # The ValueError shows that share=True IS required for your environment.
86
+ interface.launch(share=True)