aura-mind-glow / story_generator.py
surfiniaburger's picture
vid gen
032080e
import time
import os
from google import genai
from google.genai import types
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEndpoint
from PIL import Image
from utils import retry_with_exponential_backoff
def create_story_prompt_from_pdf(pdf_path: str, user_prompt: str, llm: HuggingFaceEndpoint) -> str:
"""
Reads a PDF, summarizes its content, and creates a creative prompt for video generation.
"""
try:
# 1. Load and read the PDF
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
# Limit to first 3 pages for brevity and to manage token count
pdf_content = " ".join(page.page_content for page in pages[:3])
# 2. Use an LLM to generate a creative prompt
system_prompt = """You are a creative assistant for a farmer. Your task is to read the summary of a document and a user's desired tone, and then write a short, visually descriptive prompt for a video generation model (like Google Veo). The prompt should tell a story about a farmer dealing with this paperwork, capturing the user's desired tone. Describe the scene, camera shots, and the farmer's actions.
Example:
- Document Summary: "Invoice for tractor parts, total $2,500. Delivery next week."
- User Tone: "A feeling of progress and investment in the future."
- Generated Prompt: "Close up on a farmer's weathered hands circling a date on a calendar in a rustic office. The camera pulls back to reveal invoices on the desk. The farmer looks out the window at the fields, a determined smile on their face. Golden morning light fills the room. Cinematic, hopeful, 4k."
"""
human_prompt = f"""
Document Summary: "{pdf_content[:1500]}"
User Tone: "{user_prompt}"
Generate a creative video prompt based on the summary and tone.
"""
# The llm object from HuggingFaceEndpoint expects a string prompt
creative_prompt = llm.invoke(human_prompt, config={"system_prompt": system_prompt})
print(f"Generated creative prompt: {creative_prompt}")
return creative_prompt
except Exception as e:
print(f"Error creating story from PDF: {e}")
return f"Error processing PDF: {e}"
@retry_with_exponential_backoff
def generate_video_from_prompt(prompt: str, image_path: str = None) -> str:
"""
Generates a video using the Veo API from a text prompt and an optional starting image.
Returns the path to the saved video file.
"""
# This function is now wrapped with the retry decorator.
# The try/except block is still useful for catching non-retriable errors.
try:
client = genai.Client()
if image_path:
print(f"Generating video with initial image: {image_path}")
img = Image.open(image_path)
operation = client.models.generate_videos(
model="veo-3.0-generate-preview",
prompt=prompt,
image=img,
)
else:
print("Generating video from text prompt only.")
operation = client.models.generate_videos(
model="veo-3.0-generate-preview",
prompt=prompt,
)
print("Video generation started. Polling for completion...")
while not operation.done:
print("Waiting for video generation to complete...")
time.sleep(10)
operation = client.operations.get(operation)
generated_video = operation.response.generated_videos[0]
video_file_name = "generated_story.mp4"
client.files.download(file=generated_video.video)
generated_video.video.save(video_file_name)
print(f"Generated video saved to {video_file_name}")
return video_file_name
except Exception as e:
print(f"Error generating video: {e}")
return f"Error during video generation: {e}"