Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +8 -28
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ‘‚
4
  colorFrom: green
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.44.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Generates audio environment from an image
 
4
  colorFrom: green
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.0.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Generates audio environment from an image
app.py CHANGED
@@ -26,7 +26,6 @@ def extract_audio(video_in):
26
  return 'audio.wav'
27
 
28
  def get_caption_from_kosmos(image_in):
29
- gr.Info("Generating image caption with Kosmos2...")
30
  kosmos2_client = Client("fffiloni/Kosmos-2-API", hf_token=hf_token)
31
  kosmos2_result = kosmos2_client.predict(
32
  image_input=handle_file(image_in),
@@ -87,7 +86,6 @@ def get_magnet(prompt):
87
  raise gr.Error("MAGNet space API is not ready, please try again in few minutes ")
88
 
89
  def get_audioldm(prompt):
90
- gr.Info("Now calling AudioLDM2 for SFX ...")
91
  try:
92
  client = Client("fffiloni/audioldm2-text2audio-text2music-API", hf_token=hf_token)
93
  seed = random.randint(0, MAX_SEED)
@@ -107,21 +105,18 @@ def get_audioldm(prompt):
107
  raise gr.Error("AudioLDM space API is not ready, please try again in few minutes ")
108
 
109
  def get_audiogen(prompt):
110
- gr.Info("Now calling AudioGen for SFX ...")
111
  try:
112
- client = Client("fffiloni/audiogen", hf_token=hf_token)
113
  result = client.predict(
114
- prompt=prompt,
115
- duration=10,
116
- api_name="/infer"
117
  )
118
- print(result)
119
  return result
120
  except:
121
  raise gr.Error("AudioGen space API is not ready, please try again in few minutes ")
122
 
123
  def get_tango(prompt):
124
- gr.Info("Now calling AudioGen for SFX ...")
125
  try:
126
  client = Client("fffiloni/tango", hf_token=hf_token)
127
  result = client.predict(
@@ -155,7 +150,6 @@ def get_tango2(prompt):
155
 
156
 
157
  def get_stable_audio_open(prompt):
158
- gr.Info("Now calling Stable-Audio for SFX ...")
159
  try:
160
  client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
161
  result = client.predict(
@@ -190,20 +184,6 @@ def get_ezaudio(prompt):
190
  raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
191
 
192
  def infer(image_in, chosen_model):
193
- """
194
- Generate an audio clip (sound effect) from an input image using the selected generative model.
195
-
196
- This function first generates a caption from the provided image using a vision-language model.
197
- The caption is then used as a text prompt for various audio generation models.
198
-
199
- Args:
200
- image_in (str): File path to the input image. The image will be processed to generate a descriptive caption.
201
- chosen_model (str): The name of the audio generation model to use. Supported options include: "AudioLDM-2", "Tango", "Stable Audio Open".
202
-
203
- Returns:
204
- str | dict: The path or result object of the generated audio clip, depending on the model used.
205
-
206
- """
207
  caption = get_caption_from_kosmos(image_in)
208
  if chosen_model == "MAGNet" :
209
  magnet_result = get_magnet(caption)
@@ -251,11 +231,11 @@ with gr.Blocks(css=css) as demo:
251
  chosen_model = gr.Dropdown(label="Choose a model", choices=[
252
  #"MAGNet",
253
  "AudioLDM-2",
254
- "AudioGen",
255
  "Tango",
256
- #"Tango 2",
257
  "Stable Audio Open",
258
- #"EzAudio"
259
  ], value="AudioLDM-2")
260
  submit_btn = gr.Button("Submit")
261
  with gr.Column():
@@ -272,4 +252,4 @@ with gr.Blocks(css=css) as demo:
272
  outputs=[audio_o],
273
  )
274
 
275
- demo.queue(max_size=10).launch(debug=True, show_error=True, ssr_mode=False, mcp_server=True)
 
26
  return 'audio.wav'
27
 
28
  def get_caption_from_kosmos(image_in):
 
29
  kosmos2_client = Client("fffiloni/Kosmos-2-API", hf_token=hf_token)
30
  kosmos2_result = kosmos2_client.predict(
31
  image_input=handle_file(image_in),
 
86
  raise gr.Error("MAGNet space API is not ready, please try again in few minutes ")
87
 
88
  def get_audioldm(prompt):
 
89
  try:
90
  client = Client("fffiloni/audioldm2-text2audio-text2music-API", hf_token=hf_token)
91
  seed = random.randint(0, MAX_SEED)
 
105
  raise gr.Error("AudioLDM space API is not ready, please try again in few minutes ")
106
 
107
  def get_audiogen(prompt):
 
108
  try:
109
+ client = Client("https://fffiloni-audiogen.hf.space/")
110
  result = client.predict(
111
+ prompt,
112
+ 10,
113
+ api_name="/infer"
114
  )
 
115
  return result
116
  except:
117
  raise gr.Error("AudioGen space API is not ready, please try again in few minutes ")
118
 
119
  def get_tango(prompt):
 
120
  try:
121
  client = Client("fffiloni/tango", hf_token=hf_token)
122
  result = client.predict(
 
150
 
151
 
152
  def get_stable_audio_open(prompt):
 
153
  try:
154
  client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
155
  result = client.predict(
 
184
  raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
185
 
186
  def infer(image_in, chosen_model):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  caption = get_caption_from_kosmos(image_in)
188
  if chosen_model == "MAGNet" :
189
  magnet_result = get_magnet(caption)
 
231
  chosen_model = gr.Dropdown(label="Choose a model", choices=[
232
  #"MAGNet",
233
  "AudioLDM-2",
234
+ #"AudioGen",
235
  "Tango",
236
+ "Tango 2",
237
  "Stable Audio Open",
238
+ "EzAudio"
239
  ], value="AudioLDM-2")
240
  submit_btn = gr.Button("Submit")
241
  with gr.Column():
 
252
  outputs=[audio_o],
253
  )
254
 
255
+ demo.queue(max_size=10).launch(debug=True, show_error=True)
requirements.txt CHANGED
@@ -1 +1 @@
1
- moviepy<2
 
1
+ moviepy