aach456 commited on
Commit
fb68207
·
verified ·
1 Parent(s): 409ee88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -7
app.py CHANGED
@@ -2,9 +2,12 @@ import gradio as gr
2
  import torch
3
  import numpy as np
4
  from diffusers import I2VGenXLPipeline
 
5
  from PIL import Image
6
  from moviepy.editor import ImageSequenceClip
7
  import io
 
 
8
 
9
  def generate_video(image, prompt, negative_prompt, video_length):
10
  generator = torch.manual_seed(8888)
@@ -43,31 +46,74 @@ def generate_video(image, prompt, negative_prompt, video_length):
43
 
44
  return output_file
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Gradio interface
47
- def interface(image, prompt, negative_prompt, video_length):
48
  # Convert the uploaded image to a PIL Image
49
  image = Image.open(io.BytesIO(image.read()))
50
 
51
  # Generate video and track progress
52
- return generate_video(image, prompt, negative_prompt, video_length)
 
 
 
 
 
 
 
 
53
 
54
  # Create Gradio Blocks
55
  with gr.Blocks() as demo:
56
- gr.Markdown("# AI-Powered Video Generation")
57
 
58
  with gr.Row():
59
  image_input = gr.Image(type="filepath", label="Upload Image")
60
- prompt_input = gr.Textbox(label="Enter the Prompt")
61
  negative_prompt_input = gr.Textbox(label="Enter the Negative Prompt")
62
  video_length_input = gr.Number(label="Video Length (seconds)", value=10, precision=0)
 
 
63
 
64
- generate_button = gr.Button("Generate Video")
65
- output_video = gr.Video(label="Output Video")
66
 
67
  # Define the button action
68
  generate_button.click(
69
  interface,
70
- inputs=[image_input, prompt_input, negative_prompt_input, video_length_input],
71
  outputs=output_video,
72
  show_progress=True # Show progress bar
73
  )
 
2
  import torch
3
  import numpy as np
4
  from diffusers import I2VGenXLPipeline
5
+ from transformers import MusicgenForConditionalGeneration, AutoProcessor
6
  from PIL import Image
7
  from moviepy.editor import ImageSequenceClip
8
  import io
9
+ import ffmpeg
10
+ import scipy.io.wavfile
11
 
12
  def generate_video(image, prompt, negative_prompt, video_length):
13
  generator = torch.manual_seed(8888)
 
46
 
47
  return output_file
48
 
49
+ def generate_music(prompt, unconditional=False):
50
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
51
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
52
+ model.to(device)
53
+
54
+ # Generate music
55
+ if unconditional:
56
+ unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
57
+ audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
58
+ else:
59
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
60
+ inputs = processor(
61
+ text=prompt,
62
+ padding=True,
63
+ return_tensors="pt",
64
+ )
65
+ audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
66
+
67
+ sampling_rate = model.config.audio_encoder.sampling_rate
68
+ audio_file = "musicgen_out.wav"
69
+ # Save the generated audio
70
+ scipy.io.wavfile.write(audio_file, sampling_rate, audio_values[0].cpu().numpy())
71
+
72
+ return audio_file
73
+
74
+ def combine_audio_video(audio_file, video_file):
75
+ output_file = "combined_output.mp4"
76
+ audio = ffmpeg.input(audio_file)
77
+ video = ffmpeg.input(video_file)
78
+ output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
79
+ ffmpeg.run(output)
80
+ return output_file
81
+
82
  # Gradio interface
83
+ def interface(image, prompt, negative_prompt, video_length, music_prompt, unconditional):
84
  # Convert the uploaded image to a PIL Image
85
  image = Image.open(io.BytesIO(image.read()))
86
 
87
  # Generate video and track progress
88
+ video_file = generate_video(image, prompt, negative_prompt, video_length)
89
+
90
+ # Generate music
91
+ audio_file = generate_music(music_prompt, unconditional)
92
+
93
+ # Combine audio and video
94
+ combined_file = combine_audio_video(audio_file, video_file)
95
+
96
+ return combined_file
97
 
98
  # Create Gradio Blocks
99
  with gr.Blocks() as demo:
100
+ gr.Markdown("# AI-Powered Video and Music Generation")
101
 
102
  with gr.Row():
103
  image_input = gr.Image(type="filepath", label="Upload Image")
104
+ prompt_input = gr.Textbox(label="Enter the Video Prompt")
105
  negative_prompt_input = gr.Textbox(label="Enter the Negative Prompt")
106
  video_length_input = gr.Number(label="Video Length (seconds)", value=10, precision=0)
107
+ music_prompt_input = gr.Textbox(label="Enter the Music Prompt")
108
+ unconditional_checkbox = gr.Checkbox(label="Generate Unconditional Music")
109
 
110
+ generate_button = gr.Button("Generate Video and Music")
111
+ output_video = gr.Video(label="Output Video with Sound")
112
 
113
  # Define the button action
114
  generate_button.click(
115
  interface,
116
+ inputs=[image_input, prompt_input, negative_prompt_input, video_length_input, music_prompt_input, unconditional_checkbox],
117
  outputs=output_video,
118
  show_progress=True # Show progress bar
119
  )