Spaces:
Runtime error
Runtime error
import json | |
import os | |
from typing import Optional, List, Dict, Any | |
from ppt import SlideDeck | |
from gemini import GeminiModel, LangchainGemini | |
from prompts import get_ppt_prompt | |
from utils import parse_page_ranges | |
# Define available models | |
MODELS: Dict[str, Any] = { | |
"gemini_flash_l": LangchainGemini, | |
"gemini_flash": GeminiModel | |
} | |
class GenPPT: | |
""" | |
A class to generate PowerPoint presentations from text or PDF sources using AI models. | |
""" | |
def __init__( | |
self, | |
source: str = "", | |
text: str = "", | |
agenda: str = "Generic", | |
model_name: str = "gemini_flash_l", | |
llm_api_key: Optional[str] = None, | |
pages: Optional[str] = None, | |
max_pages: int = 20, | |
): | |
""" | |
Initialize the GenPPT object. | |
Args: | |
source (str): Path to the source PDF file. | |
text (str): Input text for presentation generation. | |
agenda (str): Agenda type for the presentation. | |
model_name (str): Name of the AI model to use. | |
llm_api_key (Optional[str]): API key for the language model. | |
pages (Optional[str]): Page range to extract from PDF. | |
max_pages (int): Maximum number of pages to process. | |
""" | |
self.source: Optional[str] = source.strip() or None | |
self.text: Optional[str] = text.strip() or None | |
self.agenda: str = agenda.strip() or "Generic" | |
self.max_pages: int = max_pages | |
self.pages=pages | |
self.model_name: str = model_name.strip() | |
try: | |
self.llm = MODELS[self.model_name](API_KEY=llm_api_key) | |
except KeyError: | |
print(f"Warning: Model '{self.model_name}' not found. Using default model.") | |
self.llm = MODELS["gemini_flash_l"](API_KEY=llm_api_key) | |
def run(self) -> Optional[Any]: | |
""" | |
Run the presentation generation process. | |
Returns: | |
Optional[Any]: Generated presentation or None if an error occurs. | |
""" | |
try: | |
if self.text is None: | |
if self.source is not None: | |
self.text = self.extract_markdown_from_pdf() | |
else: | |
raise ValueError("Both source and text cannot be None.") | |
slides = self.generate_slides() | |
return self.generate_presentation(slides) | |
except Exception as e: | |
print(f"An error occurred during presentation generation: {e}") | |
return None | |
def extract_markdown_from_pdf(self) -> str: | |
""" | |
Extract markdown content from the source PDF. | |
Returns: | |
str: Extracted markdown text. | |
Raises: | |
ImportError: If pymupdf4llm is not installed. | |
FileNotFoundError: If the source PDF file is not found. | |
""" | |
try: | |
import pymupdf4llm | |
except ImportError: | |
raise ImportError("pymupdf4llm is required for PDF extraction. Please install it.") | |
if not os.path.exists(self.source): | |
raise FileNotFoundError(f"Source PDF file not found: {self.source}") | |
print(f"Extracting {len(self.pages)} pages") | |
return pymupdf4llm.to_markdown(self.source, pages=self.pages) | |
def generate_slides(self) -> List[Dict[str, Any]]: | |
""" | |
Generate slide content using the AI model. | |
Returns: | |
List[Dict[str, Any]]: List of slide data dictionaries. | |
Raises: | |
ValueError: If the model response is invalid. | |
""" | |
prompt = f"{get_ppt_prompt()}\nAgenda: {self.agenda}\nContent: {self.text}" | |
resp = self.llm.execute(prompt) | |
try: | |
return json.loads(resp.content.strip("```").replace("json", "").strip()) | |
except json.JSONDecodeError as e: | |
raise ValueError(f"Invalid model response: {e}") | |
def generate_presentation(self, content: List[Dict[str, Any]]) -> Any: | |
""" | |
Create the final presentation using the generated slide content. | |
Args: | |
content (List[Dict[str, Any]]): List of slide data dictionaries. | |
Returns: | |
Any: The generated presentation object. | |
""" | |
deck = SlideDeck() | |
title_slide_data, *slides_data = content | |
return deck.create_presentation(title_slide_data, slides_data) |