ppt / app.py
sarim's picture
reduce
185bc0f
import asyncio
import re
from pydantic_ai.result import ResultData, RunResult
import streamlit as st
from pydantic_ai import Agent,RunContext, Tool
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
from pydantic_ai.messages import ModelMessage
import pdfplumber
from transformers import pipeline
import torch
import os
import presentation as customClass
from streamlit_pdf_viewer import pdf_viewer
from dataclasses import dataclass
api_key = os.getenv("API_KEY")
data = []
last_message = ''
result_data:list[customClass.PPT] = []
# to generate ppt
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key = api_key)
# to summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
#nlpaueb/legal-bert-base-uncased
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
"""
Splits a long string into chunks of a specified maximum number of tokens (words).
:param text: The input string to split.
:param max_tokens: The maximum number of tokens (words) per chunk.
:return: A list of strings, each containing up to `max_tokens` tokens.
"""
# Split the text into words (tokens)
tokens = text.split()
# Create chunks of words
chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
return chunks
def return_data() -> str:
return "".join(data)
@dataclass
class SupportDependencies:
db:str
async def ppt_content(data):
agent = Agent(model,
result_type=customClass.PPT,
#deps_type=SupportDependencies,
tools=[
return_data,
],
system_prompt=(
"You are an expert in making power-point perssentation",
"Create 5 sliders",
"Title Slide: short into about the presentation",
"Methodology Slide: Summarize the methodology in detail",
"Results Slide: Present key findings in detail in simple text and bullet points.",
"Discussion Slide: Summarize the implications and limitations.",
"Conclusion Slide: State the overall conclusion.",
"Each slide should be seperate",
"Each slide should have 4 parts :"
"1. Title : title of the slide ",
"2. Text: he presise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
"3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, presise, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.",
#"4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.",
))
#deps :SupportDependencies = SupportDependencies(db="".join(data))
listOfString = split_into_token_chunks("".join(data))
# print(len(listOfString))
message_history: list[ModelMessage] = []
# for i, chunk in enumerate(listOfString):
# print(f"Chunk {i}:\n{chunk}\n")
# @agent.tool
# async def agentTooled(ctx: RunContext)-> str:
# """
# This is all the text from a pdf file that user has uploaded
# """
# return listOfString[0]
result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[0]}",
message_history = message_history,
)
result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[1]}",
message_history = result.all_messages(),
)
result_2 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[2]}",
message_history = result_1.all_messages(),
)
print(result_2.data)
# while len(listOfString) > 0:
# result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation",message_history=message_history)
# #print(result_1.data)
# message_history = result_1.all_messages()
# print(result_1)
def ai_ppt(data):
# #call summerizer to summerize pdf
# summary_texts = []
# listOfString = split_into_token_chunks("".join(data))
# for x in listOfString:
# print(x)
# summary = summarizer(x, max_length=500, min_length=120, truncation=True,do_sample=False)
# summary_texts .append([item['summary_text'] for item in summary])
# print(summary_texts)
# #summary_texts = [item['generated_text'] for item in summary]
asyncio.run(ppt_content(data=data))
def extract_data(feed):
with pdfplumber.open(feed) as pdf:
pages = pdf.pages
for p in pages:
data.append(p.extract_text())
return None
# if data is not None:
# st.caption(data)
# ai_ppt(data=data)
def main():
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
if uploaded_file is not None:
extract_data(uploaded_file)
if st.button("Make PPT"):
ai_ppt(data)
binary_data = uploaded_file.getvalue()
pdf_viewer(input=binary_data,
width=700)
if __name__ == '__main__':
import asyncio
nest_asyncio.apply()
main()