Spaces:
Running
Running
File size: 5,487 Bytes
5d14cc6 f78e807 50d1ce2 4ba3023 299c4e4 ec1c0d9 c52e882 ec1c0d9 6993c74 ec1c0d9 d553fab 612c0bf fae7389 ec1c0d9 a8e9d4c ec1c0d9 03b4c19 9489fce ec1c0d9 a8e9d4c 0e1f166 0c2d51b ec1c0d9 0e1f166 6993c74 a98a46a 649e865 0e1f166 299c4e4 f2fb591 185bc0f f2fb591 04f5bb7 2afa0ec 299c4e4 fae7389 67e3d05 fae7389 2afa0ec 5d14cc6 d553fab 299c4e4 72393c5 7526f2d d553fab 8cd3882 7cd5037 8cd3882 5ef6425 5d14cc6 72393c5 20dbf3f 72393c5 c52e882 41dd208 63bc27b f78e807 63bc27b c276872 6993c74 29e2a1d 6993c74 2211ca4 6993c74 2211ca4 8028338 b8dc120 766236b c52e882 60e8e5d 5d14cc6 72393c5 2a30065 72393c5 5d14cc6 ec1c0d9 86551a1 ec1c0d9 4f86a6f 612c0bf 0042245 ec1c0d9 a91875b 0042245 a91875b 612c0bf 0042245 612c0bf c7f6dea a91875b a5f868b ec1c0d9 4ba3023 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import asyncio
import re
from pydantic_ai.result import ResultData, RunResult
import streamlit as st
from pydantic_ai import Agent,RunContext, Tool
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
from pydantic_ai.messages import ModelMessage
import pdfplumber
from transformers import pipeline
import torch
import os
import presentation as customClass
from streamlit_pdf_viewer import pdf_viewer
from dataclasses import dataclass
api_key = os.getenv("API_KEY")
data = []
last_message = ''
result_data:list[customClass.PPT] = []
# to generate ppt
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key = api_key)
# to summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
#nlpaueb/legal-bert-base-uncased
def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
"""
Splits a long string into chunks of a specified maximum number of tokens (words).
:param text: The input string to split.
:param max_tokens: The maximum number of tokens (words) per chunk.
:return: A list of strings, each containing up to `max_tokens` tokens.
"""
# Split the text into words (tokens)
tokens = text.split()
# Create chunks of words
chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
return chunks
def return_data() -> str:
return "".join(data)
@dataclass
class SupportDependencies:
db:str
async def ppt_content(data):
agent = Agent(model,
result_type=customClass.PPT,
#deps_type=SupportDependencies,
tools=[
return_data,
],
system_prompt=(
"You are an expert in making power-point perssentation",
"Create 5 sliders",
"Title Slide: short into about the presentation",
"Methodology Slide: Summarize the methodology in detail",
"Results Slide: Present key findings in detail in simple text and bullet points.",
"Discussion Slide: Summarize the implications and limitations.",
"Conclusion Slide: State the overall conclusion.",
"Each slide should be seperate",
"Each slide should have 4 parts :"
"1. Title : title of the slide ",
"2. Text: he presise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
"3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, presise, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.",
#"4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.",
))
#deps :SupportDependencies = SupportDependencies(db="".join(data))
listOfString = split_into_token_chunks("".join(data))
# print(len(listOfString))
message_history: list[ModelMessage] = []
# for i, chunk in enumerate(listOfString):
# print(f"Chunk {i}:\n{chunk}\n")
# @agent.tool
# async def agentTooled(ctx: RunContext)-> str:
# """
# This is all the text from a pdf file that user has uploaded
# """
# return listOfString[0]
result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[0]}",
message_history = message_history,
)
result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[1]}",
message_history = result.all_messages(),
)
result_2 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[2]}",
message_history = result_1.all_messages(),
)
print(result_2.data)
# while len(listOfString) > 0:
# result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation",message_history=message_history)
# #print(result_1.data)
# message_history = result_1.all_messages()
# print(result_1)
def ai_ppt(data):
# #call summerizer to summerize pdf
# summary_texts = []
# listOfString = split_into_token_chunks("".join(data))
# for x in listOfString:
# print(x)
# summary = summarizer(x, max_length=500, min_length=120, truncation=True,do_sample=False)
# summary_texts .append([item['summary_text'] for item in summary])
# print(summary_texts)
# #summary_texts = [item['generated_text'] for item in summary]
asyncio.run(ppt_content(data=data))
def extract_data(feed):
with pdfplumber.open(feed) as pdf:
pages = pdf.pages
for p in pages:
data.append(p.extract_text())
return None
# if data is not None:
# st.caption(data)
# ai_ppt(data=data)
def main():
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
if uploaded_file is not None:
extract_data(uploaded_file)
if st.button("Make PPT"):
ai_ppt(data)
binary_data = uploaded_file.getvalue()
pdf_viewer(input=binary_data,
width=700)
if __name__ == '__main__':
import asyncio
nest_asyncio.apply()
main()
|