Ad / app.py
aach456's picture
Update app.py
4b1014b verified
raw
history blame
4.22 kB
import streamlit as st
from PIL import Image
import torch
import numpy as np
from moviepy.editor import ImageSequenceClip
from transformers import MusicgenForConditionalGeneration, AutoProcessor
from scipy.io import wavfile
import ffmpeg
from diffusers import I2VGenXLPipeline
def generate_video(image, prompt, negative_prompt, video_length):
generator = torch.manual_seed(8888)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")
pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float32)
pipeline.to(device)
frames = []
total_frames = video_length * 20 # Assuming 20 frames per second
# Generate frames with progress tracking
for i in range(total_frames):
frame = pipeline(
prompt=prompt,
image=image,
num_inference_steps=2,
negative_prompt=negative_prompt,
guidance_scale=9.0,
generator=generator,
num_frames=1
).frames[0]
frames.append(frame)
st.progress((i + 1) / total_frames) # Update progress bar
return frames
def export_frames_to_video(frames, output_file):
frames_np = [np.array(frame) for frame in frames]
clip = ImageSequenceClip(frames_np, fps=30)
clip.write_videofile(output_file, codec='libx264', audio=False)
def generate_music(prompt, unconditional=False):
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device)
# Simulate progress for music generation
st.progress(0) # Initialize progress bar
if unconditional:
unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
else:
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
inputs = processor(
text=prompt,
padding=True,
return_tensors="pt",
)
# Simulate progress by updating the progress bar
for i in range(1, 6): # Assuming 5 steps for demonstration
audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
st.progress(i / 5) # Update progress bar
sampling_rate = model.config.audio_encoder.sampling_rate
return audio_values[0].cpu().numpy(), sampling_rate
def combine_audio_video(audio_file, video_file, output_file):
audio = ffmpeg.input(audio_file)
video = ffmpeg.input(video_file)
output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
ffmpeg.run(output)
# Streamlit UI
st.title("AI-Powered Video and Music Generation")
st.sidebar.title("Options")
st.sidebar.subheader("Video Generation")
image = st.sidebar.file_uploader("Upload an image", type=["jpg", "png"])
prompt = st.sidebar.text_input("Enter the prompt")
negative_prompt = st.sidebar.text_input("Enter the negative prompt")
video_length = st.sidebar.number_input("Enter the video length (seconds)", min_value=1, value=10)
st.sidebar.subheader("Music Generation")
music_prompt = st.sidebar.text_input("Enter the music prompt")
unconditional = st.sidebar.checkbox("Generate unconditional music")
if st.sidebar.button("Generate Video and Music"):
if image is not None:
image = Image.open(image)
# Video generation with progress bar
st.write("Generating video...")
video_frames = generate_video(image, prompt, negative_prompt, video_length)
export_frames_to_video(video_frames, "output_video.mp4")
st.video("output_video.mp4")
# Music generation with progress bar
st.write("Generating music...")
audio_values, sampling_rate = generate_music(music_prompt, unconditional)
wavfile.write("musicgen_out.wav", sampling_rate, audio_values)
st.audio("musicgen_out.wav")
# Combine audio and video
st.write("Combining audio and video...")
combine_audio_video("musicgen_out.wav", "output_video.mp4", "combined_output.mp4")
st.video("combined_output.mp4")