import streamlit as st from transformers import pipeline from PIL import Image, ImageDraw, ImageFont import moviepy.editor as mpe import tempfile import os # Load TTS model tts_model = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_fastspeech2") def generate_audio(text, output_path): tts = tts_model(text) with open(output_path, "wb") as f: f.write(tts[0]["audio"]) def generate_images(text, output_folder, num_images=5): if not os.path.exists(output_folder): os.makedirs(output_folder) words = text.split() chunk_size = max(1, len(words) // num_images) chunks = [words[i:i + chunk_size] for i in range(0, len(words), chunk_size)] for i, chunk in enumerate(chunks): image = Image.new("RGB", (1280, 720), color=(255, 255, 255)) draw = ImageDraw.Draw(image) font = ImageFont.load_default() text = " ".join(chunk) draw.text((10, 360), text, fill=(0, 0, 0), font=font) image.save(os.path.join(output_folder, f"frame_{i:03d}.png")) def create_video(image_folder, audio_path, output_path): image_files = [os.path.join(image_folder, img) for img in sorted(os.listdir(image_folder)) if img.endswith(".png")] clips = [mpe.ImageClip(img).set_duration(2) for img in image_files] video = mpe.concatenate_videoclips(clips, method="compose") audio = mpe.AudioFileClip(audio_path) video = video.set_audio(audio) video.write_videofile(output_path, fps=24) def main(): st.title("Text to Video Generator") text_input = st.text_area("Enter the text to generate video:", "") if st.button("Generate Video"): if text_input: with tempfile.TemporaryDirectory() as temp_dir: audio_path = os.path.join(temp_dir, "audio.wav") image_folder = os.path.join(temp_dir, "images") video_path = os.path.join(temp_dir, "video.mp4") generate_audio(text_input, audio_path) generate_images(text_input, image_folder) create_video(image_folder, audio_path, video_path) with open(video_path, "rb") as video_file: st.video(video_file.read(), format="video/mp4") else: st.error("Please enter some text.") if __name__ == "__main__": main()