import os import asyncio import logging import tempfile import requests import re import math import edge_tts import gradio as gr from pydub import AudioSegment import subprocess # Configuración básica de logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Clave API de Pexels (configurar en Secrets de Hugging Face) PEXELS_API_KEY = os.environ.get("PEXELS_API_KEY", "YOUR_API_KEY") # --- Funciones optimizadas para Spaces --- def extract_keywords(text, max_keywords=3): """Extrae palabras clave usando un método simple pero efectivo""" # Limpieza de texto text = re.sub(r'[^\w\s]', '', text.lower()) words = text.split() # Palabras comunes a excluir stop_words = {"el", "la", "los", "las", "de", "en", "y", "a", "que", "es", "por", "un", "una", "con"} # Frecuencia de palabras word_freq = {} for word in words: if len(word) > 3 and word not in stop_words: word_freq[word] = word_freq.get(word, 0) + 1 # Ordenar por frecuencia sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) return [word for word, _ in sorted_words[:max_keywords]] def search_pexels_videos(keywords, per_query=2): """Busca videos en Pexels usando su API oficial""" if not PEXELS_API_KEY: logger.error("API_KEY de Pexels no configurada") return [] headers = {"Authorization": PEXELS_API_KEY} video_urls = [] for query in keywords: try: params = { "query": query, "per_page": per_query, "orientation": "landscape", "size": "medium" } response = requests.get( "https://api.pexels.com/videos/search", headers=headers, params=params, timeout=15 ) if response.status_code == 200: data = response.json() videos = data.get("videos", []) for video in videos: video_files = video.get("video_files", []) if video_files: # Seleccionar el video con la mejor resolución disponible best_quality = max( video_files, key=lambda x: x.get("width", 0) * x.get("height", 0) ) video_urls.append(best_quality["link"]) except Exception as e: logger.error(f"Error buscando videos: {e}") return video_urls async def generate_tts(text, output_path, voice="es-ES-ElviraNeural"): """Genera audio TTS usando edge-tts""" try: communicate = edge_tts.Communicate(text, voice) await communicate.save(output_path) return True except Exception as e: logger.error(f"Error en TTS: {e}") return False def download_video(url, temp_dir): """Descarga un video desde una URL a un directorio temporal""" try: response = requests.get(url, stream=True, timeout=30) response.raise_for_status() filename = f"video_{os.getpid()}.mp4" filepath = os.path.join(temp_dir, filename) with open(filepath, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return filepath except Exception as e: logger.error(f"Error descargando video: {e}") return None def create_video(audio_path, video_paths, output_path): """Crea el video final usando FFmpeg (más eficiente que moviepy)""" try: # Crear archivo de lista para concatenación list_file = "input.txt" with open(list_file, "w") as f: for path in video_paths: f.write(f"file '{os.path.basename(path)}'\n") # Mover al directorio temporal para procesamiento os.chdir(os.path.dirname(video_paths[0])) # Comando FFmpeg para concatenar videos y añadir audio cmd = [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", output_path ] subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except Exception as e: logger.error(f"Error creando video: {e}") return False finally: if os.path.exists(list_file): os.remove(list_file) def add_background_music(audio_path, music_path): """Añade música de fondo al audio principal""" try: speech = AudioSegment.from_file(audio_path) background = AudioSegment.from_file(music_path) - 20 # Reducir volumen # Extender música si es necesario if len(background) < len(speech): loops = math.ceil(len(speech) / len(background)) background = background * loops combined = speech.overlay(background[:len(speech)]) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: combined.export(tmp_file.name, format="mp3") return tmp_file.name except Exception as e: logger.error(f"Error mezclando audio: {e}") return audio_path async def generate_video(text, music_file=None): """Función principal para generar el video""" temp_dir = tempfile.mkdtemp() output_files = [] try: # 1. Generar audio TTS tts_path = os.path.join(temp_dir, "audio.mp3") if not await generate_tts(text, tts_path): return None, "Error generando voz" output_files.append(tts_path) # 2. Añadir música de fondo si está disponible final_audio = tts_path if music_file: mixed_audio = add_background_music(tts_path, music_file) if mixed_audio != tts_path: final_audio = mixed_audio output_files.append(mixed_audio) # 3. Extraer palabras clave keywords = extract_keywords(text) logger.info(f"Palabras clave identificadas: {keywords}") if not keywords: return None, "No se pudieron extraer palabras clave del texto" # 4. Buscar y descargar videos video_urls = search_pexels_videos(keywords) if not video_urls: return None, "No se encontraron videos para las palabras clave" video_paths = [] for url in video_urls: path = download_video(url, temp_dir) if path: video_paths.append(path) output_files.append(path) if not video_paths: return None, "Error descargando videos" # 5. Crear video final output_path = os.path.join(temp_dir, "final_video.mp4") if create_video(final_audio, video_paths, output_path): return output_path, "Video creado exitosamente" else: return None, "Error en la creación del video" except Exception as e: logger.exception("Error inesperado") return None, f"Error: {str(e)}" finally: # No eliminamos archivos temporales - Hugging Face los maneja pass # --- Interfaz de Gradio optimizada --- with gr.Blocks(title="Generador Automático de Videos con IA", theme="soft") as demo: gr.Markdown("# 🎬 Generador Automático de Videos con IA") gr.Markdown("Transforma texto en videos usando contenido de Pexels y voz sintetizada") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Texto para el video", placeholder="Describe el contenido que quieres en el video...", lines=5 ) music_input = gr.Audio( label="Música de fondo (opcional)", type="filepath" ) generate_btn = gr.Button("Generar Video", variant="primary") with gr.Column(): video_output = gr.Video(label="Video Generado", interactive=False) status_output = gr.Textbox(label="Estado", interactive=False) generate_btn.click( fn=lambda: (None, "Procesando... (esto puede tomar 1-2 minutos)"), outputs=[video_output, status_output], queue=False ).then( fn=generate_video, inputs=[text_input, music_input], outputs=[video_output, status_output] ) gr.Markdown("### Características:") gr.Markdown(""" - **Extracción inteligente de palabras clave** del texto - **Búsqueda automática de videos** en Pexels - **Generación de voz** con Edge TTS - **Música de fondo opcional** - **Procesamiento eficiente** con FFmpeg """) # Para Hugging Face Spaces if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)