Spaces:

surfiniaburger
/

aura-mind-glow

Sleeping

App Files Files Community

aura-mind-glow / story_generator.py

surfiniaburger

vid gen

032080e 3 months ago

raw

history blame

3.99 kB


	import time
	import os
	from google import genai
	from google.genai import types
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_huggingface import HuggingFaceEndpoint
	from PIL import Image
	from utils import retry_with_exponential_backoff

	def create_story_prompt_from_pdf(pdf_path: str, user_prompt: str, llm: HuggingFaceEndpoint) -> str:
	"""
	Reads a PDF, summarizes its content, and creates a creative prompt for video generation.
	"""
	try:
	# 1. Load and read the PDF
	loader = PyPDFLoader(pdf_path)
	pages = loader.load_and_split()
	# Limit to first 3 pages for brevity and to manage token count
	pdf_content = " ".join(page.page_content for page in pages[:3])

	# 2. Use an LLM to generate a creative prompt
	system_prompt = """You are a creative assistant for a farmer. Your task is to read the summary of a document and a user's desired tone, and then write a short, visually descriptive prompt for a video generation model (like Google Veo). The prompt should tell a story about a farmer dealing with this paperwork, capturing the user's desired tone. Describe the scene, camera shots, and the farmer's actions.

	Example:
	- Document Summary: "Invoice for tractor parts, total $2,500. Delivery next week."
	- User Tone: "A feeling of progress and investment in the future."
	- Generated Prompt: "Close up on a farmer's weathered hands circling a date on a calendar in a rustic office. The camera pulls back to reveal invoices on the desk. The farmer looks out the window at the fields, a determined smile on their face. Golden morning light fills the room. Cinematic, hopeful, 4k."
	"""

	human_prompt = f"""
	Document Summary: "{pdf_content[:1500]}"
	User Tone: "{user_prompt}"

	Generate a creative video prompt based on the summary and tone.
	"""

	# The llm object from HuggingFaceEndpoint expects a string prompt
	creative_prompt = llm.invoke(human_prompt, config={"system_prompt": system_prompt})

	print(f"Generated creative prompt: {creative_prompt}")
	return creative_prompt

	except Exception as e:
	print(f"Error creating story from PDF: {e}")
	return f"Error processing PDF: {e}"

	@retry_with_exponential_backoff
	def generate_video_from_prompt(prompt: str, image_path: str = None) -> str:
	"""
	Generates a video using the Veo API from a text prompt and an optional starting image.
	Returns the path to the saved video file.
	"""
	# This function is now wrapped with the retry decorator.
	# The try/except block is still useful for catching non-retriable errors.
	try:
	client = genai.Client()

	if image_path:
	print(f"Generating video with initial image: {image_path}")
	img = Image.open(image_path)
	operation = client.models.generate_videos(
	model="veo-3.0-generate-preview",
	prompt=prompt,
	image=img,
	)
	else:
	print("Generating video from text prompt only.")
	operation = client.models.generate_videos(
	model="veo-3.0-generate-preview",
	prompt=prompt,
	)

	print("Video generation started. Polling for completion...")
	while not operation.done:
	print("Waiting for video generation to complete...")
	time.sleep(10)
	operation = client.operations.get(operation)

	generated_video = operation.response.generated_videos[0]

	video_file_name = "generated_story.mp4"
	client.files.download(file=generated_video.video)
	generated_video.video.save(video_file_name)

	print(f"Generated video saved to {video_file_name}")
	return video_file_name

	except Exception as e:
	print(f"Error generating video: {e}")
	return f"Error during video generation: {e}"