Spaces:
Sleeping
Sleeping
| import time | |
| import os | |
| from google import genai | |
| from google.genai import types | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| from PIL import Image | |
| from utils import retry_with_exponential_backoff | |
| def create_story_prompt_from_pdf(pdf_path: str, user_prompt: str, llm: HuggingFaceEndpoint) -> str: | |
| """ | |
| Reads a PDF, summarizes its content, and creates a creative prompt for video generation. | |
| """ | |
| try: | |
| # 1. Load and read the PDF | |
| loader = PyPDFLoader(pdf_path) | |
| pages = loader.load_and_split() | |
| # Limit to first 3 pages for brevity and to manage token count | |
| pdf_content = " ".join(page.page_content for page in pages[:3]) | |
| # 2. Use an LLM to generate a creative prompt | |
| system_prompt = """You are a creative assistant for a farmer. Your task is to read the summary of a document and a user's desired tone, and then write a short, visually descriptive prompt for a video generation model (like Google Veo). The prompt should tell a story about a farmer dealing with this paperwork, capturing the user's desired tone. Describe the scene, camera shots, and the farmer's actions. | |
| Example: | |
| - Document Summary: "Invoice for tractor parts, total $2,500. Delivery next week." | |
| - User Tone: "A feeling of progress and investment in the future." | |
| - Generated Prompt: "Close up on a farmer's weathered hands circling a date on a calendar in a rustic office. The camera pulls back to reveal invoices on the desk. The farmer looks out the window at the fields, a determined smile on their face. Golden morning light fills the room. Cinematic, hopeful, 4k." | |
| """ | |
| human_prompt = f""" | |
| Document Summary: "{pdf_content[:1500]}" | |
| User Tone: "{user_prompt}" | |
| Generate a creative video prompt based on the summary and tone. | |
| """ | |
| # The llm object from HuggingFaceEndpoint expects a string prompt | |
| creative_prompt = llm.invoke(human_prompt, config={"system_prompt": system_prompt}) | |
| print(f"Generated creative prompt: {creative_prompt}") | |
| return creative_prompt | |
| except Exception as e: | |
| print(f"Error creating story from PDF: {e}") | |
| return f"Error processing PDF: {e}" | |
| def generate_video_from_prompt(prompt: str, image_path: str = None) -> str: | |
| """ | |
| Generates a video using the Veo API from a text prompt and an optional starting image. | |
| Returns the path to the saved video file. | |
| """ | |
| # This function is now wrapped with the retry decorator. | |
| # The try/except block is still useful for catching non-retriable errors. | |
| try: | |
| client = genai.Client() | |
| if image_path: | |
| print(f"Generating video with initial image: {image_path}") | |
| img = Image.open(image_path) | |
| operation = client.models.generate_videos( | |
| model="veo-3.0-generate-preview", | |
| prompt=prompt, | |
| image=img, | |
| ) | |
| else: | |
| print("Generating video from text prompt only.") | |
| operation = client.models.generate_videos( | |
| model="veo-3.0-generate-preview", | |
| prompt=prompt, | |
| ) | |
| print("Video generation started. Polling for completion...") | |
| while not operation.done: | |
| print("Waiting for video generation to complete...") | |
| time.sleep(10) | |
| operation = client.operations.get(operation) | |
| generated_video = operation.response.generated_videos[0] | |
| video_file_name = "generated_story.mp4" | |
| client.files.download(file=generated_video.video) | |
| generated_video.video.save(video_file_name) | |
| print(f"Generated video saved to {video_file_name}") | |
| return video_file_name | |
| except Exception as e: | |
| print(f"Error generating video: {e}") | |
| return f"Error during video generation: {e}" | |