File size: 5,487 Bytes
5d14cc6
f78e807
50d1ce2
4ba3023
299c4e4
ec1c0d9
 
c52e882
ec1c0d9
6993c74
 
ec1c0d9
d553fab
612c0bf
fae7389
ec1c0d9
a8e9d4c
ec1c0d9
 
03b4c19
9489fce
ec1c0d9
 
a8e9d4c
0e1f166
0c2d51b
ec1c0d9
0e1f166
 
6993c74
a98a46a
649e865
0e1f166
 
299c4e4
f2fb591
185bc0f
f2fb591
 
 
 
 
 
 
 
 
 
 
 
04f5bb7
 
2afa0ec
299c4e4
 
 
fae7389
 
67e3d05
fae7389
2afa0ec
5d14cc6
d553fab
299c4e4
72393c5
7526f2d
 
 
d553fab
8cd3882
7cd5037
8cd3882
 
 
 
 
 
 
 
5ef6425
 
 
5d14cc6
72393c5
20dbf3f
72393c5
c52e882
41dd208
 
63bc27b
 
 
 
f78e807
63bc27b
 
c276872
6993c74
29e2a1d
6993c74
2211ca4
 
 
 
 
 
 
6993c74
2211ca4
8028338
b8dc120
766236b
c52e882
 
 
 
 
 
60e8e5d
5d14cc6
 
72393c5
 
 
 
 
 
 
 
2a30065
72393c5
 
5d14cc6
 
ec1c0d9
 
86551a1
ec1c0d9
 
4f86a6f
612c0bf
0042245
ec1c0d9
 
a91875b
 
 
 
 
 
 
 
0042245
a91875b
 
612c0bf
0042245
 
 
612c0bf
 
 
c7f6dea
a91875b
 
 
 
a5f868b
ec1c0d9
 
4ba3023
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import asyncio
import re
from pydantic_ai.result import ResultData, RunResult
import streamlit as st
from pydantic_ai import Agent,RunContext, Tool
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
from pydantic_ai.messages import ModelMessage
import pdfplumber
from transformers import pipeline
import torch
import os
import presentation as customClass
from streamlit_pdf_viewer import pdf_viewer
from dataclasses import dataclass


api_key = os.getenv("API_KEY")
data = []
last_message = ''
result_data:list[customClass.PPT] = []



# to generate ppt
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key = api_key)


# to summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
#nlpaueb/legal-bert-base-uncased




def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
    """
    Splits a long string into chunks of a specified maximum number of tokens (words).

    :param text: The input string to split.
    :param max_tokens: The maximum number of tokens (words) per chunk.
    :return: A list of strings, each containing up to `max_tokens` tokens.
    """
    # Split the text into words (tokens)
    tokens = text.split()
    
    # Create chunks of words
    chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
    
    return chunks

def return_data() -> str:
    return "".join(data)

@dataclass
class SupportDependencies:
    db:str


async def ppt_content(data):
    agent = Agent(model,
    result_type=customClass.PPT,
    #deps_type=SupportDependencies,
    tools=[
        return_data,
    ],
    system_prompt=(
        "You are an expert in making power-point perssentation",
        "Create 5 sliders",
        "Title Slide: short into about the presentation",
        "Methodology Slide: Summarize the methodology in detail",
        "Results Slide: Present key findings in detail in simple text and bullet points.",
        "Discussion Slide: Summarize the implications and limitations.",
        "Conclusion Slide: State the overall conclusion.",
        "Each slide should be seperate",
        "Each slide should have 4 parts :"
        "1. Title : title of the slide ",
        "2. Text: he presise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
        "3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, presise, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.",
        #"4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.", 
    ))
    #deps :SupportDependencies = SupportDependencies(db="".join(data))
    listOfString = split_into_token_chunks("".join(data))
    # print(len(listOfString))
    message_history: list[ModelMessage]  = []
    # for i, chunk in enumerate(listOfString):
    #     print(f"Chunk {i}:\n{chunk}\n")
    # @agent.tool
    # async def agentTooled(ctx: RunContext)-> str:
    #     """
    #     This is all the text from a pdf file that user has uploaded

    #     """
    #     return listOfString[0]

    
    result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[0]}",
        message_history = message_history,
    )
    result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[1]}",
        message_history = result.all_messages(),
    )
    result_2 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[2]}",
        message_history = result_1.all_messages(),
    )
    
    print(result_2.data)


    
    # while len(listOfString) > 0:

    #     result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation",message_history=message_history)
    # #print(result_1.data)
    #     message_history = result_1.all_messages()
    #     print(result_1)
    

def ai_ppt(data):
    # #call summerizer to summerize pdf
    # summary_texts = []
    # listOfString = split_into_token_chunks("".join(data)) 
    # for x in listOfString:
    #     print(x)
    #     summary = summarizer(x, max_length=500, min_length=120, truncation=True,do_sample=False)
    #     summary_texts .append([item['summary_text'] for item in summary])
    #     print(summary_texts)
    
    # #summary_texts = [item['generated_text'] for item in summary]
    asyncio.run(ppt_content(data=data))


def extract_data(feed):
    
    with pdfplumber.open(feed) as pdf:
        pages = pdf.pages
        for p in pages:
            data.append(p.extract_text())
        
        
    return None 



# if data is not None:
#     st.caption(data)
#     ai_ppt(data=data)

def main():
    uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
    
    if uploaded_file is not None:
        extract_data(uploaded_file)
        
        if st.button("Make PPT"):
            ai_ppt(data)

        binary_data = uploaded_file.getvalue()
        pdf_viewer(input=binary_data,
                width=700)
    

if __name__ == '__main__':
    import asyncio
    nest_asyncio.apply()
    main()