Spaces:
Sleeping
Sleeping
import time | |
import os | |
from google import genai | |
from google.genai import types | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_huggingface import HuggingFaceEndpoint | |
from PIL import Image | |
from utils import retry_with_exponential_backoff | |
def create_story_prompt_from_pdf(pdf_path: str, user_prompt: str, llm: HuggingFaceEndpoint) -> str: | |
""" | |
Reads a PDF, summarizes its content, and creates a creative prompt for video generation. | |
""" | |
try: | |
# 1. Load and read the PDF | |
loader = PyPDFLoader(pdf_path) | |
pages = loader.load_and_split() | |
# Limit to first 3 pages for brevity and to manage token count | |
pdf_content = " ".join(page.page_content for page in pages[:3]) | |
# 2. Use an LLM to generate a creative prompt | |
system_prompt = """You are a creative assistant for a farmer. Your task is to read the summary of a document and a user's desired tone, and then write a short, visually descriptive prompt for a video generation model (like Google Veo). The prompt should tell a story about a farmer dealing with this paperwork, capturing the user's desired tone. Describe the scene, camera shots, and the farmer's actions. | |
Example: | |
- Document Summary: "Invoice for tractor parts, total $2,500. Delivery next week." | |
- User Tone: "A feeling of progress and investment in the future." | |
- Generated Prompt: "Close up on a farmer's weathered hands circling a date on a calendar in a rustic office. The camera pulls back to reveal invoices on the desk. The farmer looks out the window at the fields, a determined smile on their face. Golden morning light fills the room. Cinematic, hopeful, 4k." | |
""" | |
human_prompt = f""" | |
Document Summary: "{pdf_content[:1500]}" | |
User Tone: "{user_prompt}" | |
Generate a creative video prompt based on the summary and tone. | |
""" | |
# The llm object from HuggingFaceEndpoint expects a string prompt | |
creative_prompt = llm.invoke(human_prompt, config={"system_prompt": system_prompt}) | |
print(f"Generated creative prompt: {creative_prompt}") | |
return creative_prompt | |
except Exception as e: | |
print(f"Error creating story from PDF: {e}") | |
return f"Error processing PDF: {e}" | |
def generate_video_from_prompt(prompt: str, image_path: str = None) -> str: | |
""" | |
Generates a video using the Veo API from a text prompt and an optional starting image. | |
Returns the path to the saved video file. | |
""" | |
# This function is now wrapped with the retry decorator. | |
# The try/except block is still useful for catching non-retriable errors. | |
try: | |
client = genai.Client() | |
if image_path: | |
print(f"Generating video with initial image: {image_path}") | |
img = Image.open(image_path) | |
operation = client.models.generate_videos( | |
model="veo-3.0-generate-preview", | |
prompt=prompt, | |
image=img, | |
) | |
else: | |
print("Generating video from text prompt only.") | |
operation = client.models.generate_videos( | |
model="veo-3.0-generate-preview", | |
prompt=prompt, | |
) | |
print("Video generation started. Polling for completion...") | |
while not operation.done: | |
print("Waiting for video generation to complete...") | |
time.sleep(10) | |
operation = client.operations.get(operation) | |
generated_video = operation.response.generated_videos[0] | |
video_file_name = "generated_story.mp4" | |
client.files.download(file=generated_video.video) | |
generated_video.video.save(video_file_name) | |
print(f"Generated video saved to {video_file_name}") | |
return video_file_name | |
except Exception as e: | |
print(f"Error generating video: {e}") | |
return f"Error during video generation: {e}" | |