samvish commited on
Commit
c8ecbd4
Β·
verified Β·
1 Parent(s): 185bc0f

Update app.py

Browse files

Fixed API Key Handling: The script now raises an error if API_KEY is missing.
βœ… Better String Joining: Used "\n".join(data) to correctly format extracted text.
βœ… Structured System Prompt: The prompt is now clearer and formatted correctly.
βœ… Fixed Async Handling: Used asyncio.new_event_loop() to avoid conflicts with Streamlit.
βœ… PDF Extraction Reset: Clears data before extracting text from a new PDF.
βœ… Iterative PowerPoint Generation: Handles multiple chunks instead of just the first three.
βœ… Improved Error Handling: Displays errors in Streamlit if PowerPoint generation fails.

Files changed (1) hide show
  1. app.py +73 -125
app.py CHANGED
@@ -1,170 +1,118 @@
1
  import asyncio
 
2
  import re
3
- from pydantic_ai.result import ResultData, RunResult
4
  import streamlit as st
5
- from pydantic_ai import Agent,RunContext, Tool
 
 
 
 
6
  from pydantic_ai.models.groq import GroqModel
7
- import nest_asyncio
8
  from pydantic_ai.messages import ModelMessage
9
- import pdfplumber
10
- from transformers import pipeline
11
- import torch
12
- import os
13
  import presentation as customClass
14
- from streamlit_pdf_viewer import pdf_viewer
15
- from dataclasses import dataclass
16
-
17
 
 
18
  api_key = os.getenv("API_KEY")
19
- data = []
20
- last_message = ''
21
- result_data:list[customClass.PPT] = []
22
-
23
-
24
-
25
- # to generate ppt
26
- model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key = api_key)
27
 
 
 
28
 
29
- # to summarize
 
30
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
31
- #summarizer = pipeline('text2text-generation', model='describeai/gemini')
32
- #nlpaueb/legal-bert-base-uncased
33
-
34
-
35
-
36
 
37
  def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
38
  """
39
  Splits a long string into chunks of a specified maximum number of tokens (words).
40
-
41
- :param text: The input string to split.
42
- :param max_tokens: The maximum number of tokens (words) per chunk.
43
- :return: A list of strings, each containing up to `max_tokens` tokens.
44
  """
45
- # Split the text into words (tokens)
46
  tokens = text.split()
47
-
48
- # Create chunks of words
49
- chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
50
-
51
- return chunks
52
 
53
  def return_data() -> str:
54
- return "".join(data)
 
55
 
56
  @dataclass
57
  class SupportDependencies:
58
- db:str
59
-
60
 
61
  async def ppt_content(data):
62
- agent = Agent(model,
63
- result_type=customClass.PPT,
64
- #deps_type=SupportDependencies,
65
- tools=[
66
- return_data,
67
- ],
68
- system_prompt=(
69
- "You are an expert in making power-point perssentation",
70
- "Create 5 sliders",
71
- "Title Slide: short into about the presentation",
72
- "Methodology Slide: Summarize the methodology in detail",
73
- "Results Slide: Present key findings in detail in simple text and bullet points.",
74
- "Discussion Slide: Summarize the implications and limitations.",
75
- "Conclusion Slide: State the overall conclusion.",
76
- "Each slide should be seperate",
77
- "Each slide should have 4 parts :"
78
- "1. Title : title of the slide ",
79
- "2. Text: he presise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
80
- "3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, presise, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.",
81
- #"4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.",
82
- ))
83
- #deps :SupportDependencies = SupportDependencies(db="".join(data))
84
- listOfString = split_into_token_chunks("".join(data))
85
- # print(len(listOfString))
86
- message_history: list[ModelMessage] = []
87
- # for i, chunk in enumerate(listOfString):
88
- # print(f"Chunk {i}:\n{chunk}\n")
89
- # @agent.tool
90
- # async def agentTooled(ctx: RunContext)-> str:
91
- # """
92
- # This is all the text from a pdf file that user has uploaded
93
-
94
- # """
95
- # return listOfString[0]
96
-
97
-
98
- result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[0]}",
99
- message_history = message_history,
100
- )
101
- result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[1]}",
102
- message_history = result.all_messages(),
103
- )
104
- result_2 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation from {listOfString[2]}",
105
- message_history = result_1.all_messages(),
106
  )
107
-
108
- print(result_2.data)
109
 
 
 
110
 
 
111
 
112
- # while len(listOfString) > 0:
 
113
 
114
- # result_1 = agent.run_sync(user_prompt = f"Create me a powerpoint presentation",message_history=message_history)
115
- # #print(result_1.data)
116
- # message_history = result_1.all_messages()
117
- # print(result_1)
118
-
119
 
120
  def ai_ppt(data):
121
- # #call summerizer to summerize pdf
122
- # summary_texts = []
123
- # listOfString = split_into_token_chunks("".join(data))
124
- # for x in listOfString:
125
- # print(x)
126
- # summary = summarizer(x, max_length=500, min_length=120, truncation=True,do_sample=False)
127
- # summary_texts .append([item['summary_text'] for item in summary])
128
- # print(summary_texts)
129
-
130
- # #summary_texts = [item['generated_text'] for item in summary]
131
- asyncio.run(ppt_content(data=data))
132
-
133
 
134
  def extract_data(feed):
135
-
 
 
136
  with pdfplumber.open(feed) as pdf:
137
- pages = pdf.pages
138
- for p in pages:
139
- data.append(p.extract_text())
140
-
141
-
142
- return None
143
-
144
-
145
-
146
- # if data is not None:
147
- # st.caption(data)
148
- # ai_ppt(data=data)
149
 
150
  def main():
151
- uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
 
 
 
152
 
153
  if uploaded_file is not None:
154
  extract_data(uploaded_file)
155
 
156
- if st.button("Make PPT"):
157
- ai_ppt(data)
158
-
 
 
 
 
 
159
  binary_data = uploaded_file.getvalue()
160
- pdf_viewer(input=binary_data,
161
- width=700)
162
-
163
 
164
  if __name__ == '__main__':
165
- import asyncio
166
  nest_asyncio.apply()
167
  main()
168
-
169
-
170
-
 
1
  import asyncio
2
+ import os
3
  import re
4
+ import pdfplumber
5
  import streamlit as st
6
+ import torch
7
+ from transformers import pipeline
8
+ from dataclasses import dataclass
9
+ from streamlit_pdf_viewer import pdf_viewer
10
+ from pydantic_ai import Agent, RunContext, Tool
11
  from pydantic_ai.models.groq import GroqModel
 
12
  from pydantic_ai.messages import ModelMessage
 
 
 
 
13
  import presentation as customClass
14
+ import nest_asyncio
 
 
15
 
16
+ # Load API key
17
  api_key = os.getenv("API_KEY")
18
+ if not api_key:
19
+ raise ValueError("API_KEY is not set in the environment variables.")
 
 
 
 
 
 
20
 
21
+ data = []
22
+ result_data: list[customClass.PPT] = []
23
 
24
+ # Initialize models
25
+ model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key)
26
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
27
 
28
  def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
29
  """
30
  Splits a long string into chunks of a specified maximum number of tokens (words).
 
 
 
 
31
  """
 
32
  tokens = text.split()
33
+ return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
 
 
 
 
34
 
35
  def return_data() -> str:
36
+ """Returns concatenated extracted data."""
37
+ return "\n".join(data)
38
 
39
  @dataclass
40
  class SupportDependencies:
41
+ db: str
 
42
 
43
  async def ppt_content(data):
44
+ """
45
+ Generates PowerPoint content using an AI model.
46
+ """
47
+ if not data:
48
+ raise ValueError("No valid text found for PowerPoint generation.")
49
+
50
+ agent = Agent(
51
+ model,
52
+ result_type=customClass.PPT,
53
+ tools=[return_data],
54
+ system_prompt="""
55
+ You are an expert in creating PowerPoint presentations.
56
+ Create 5 slides:
57
+ 1. Title Slide: Introduction about the presentation.
58
+ 2. Methodology Slide: Summarize the methodology in detail.
59
+ 3. Results Slide: Present key findings in bullet points.
60
+ 4. Discussion Slide: Summarize implications and limitations.
61
+ 5. Conclusion Slide: State the overall conclusion.
62
+
63
+ Each slide should have:
64
+ - Title: Clear and concise.
65
+ - Text: Short and informative explanation.
66
+ - Bullet Points: 3-5 summarized key takeaways.
67
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
 
 
69
 
70
+ listOfString = split_into_token_chunks("\n".join(data))
71
+ message_history: list[ModelMessage] = []
72
 
73
+ result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history)
74
 
75
+ for i in range(1, len(listOfString)):
76
+ result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages())
77
 
78
+ print(result.data)
 
 
 
 
79
 
80
  def ai_ppt(data):
81
+ """Runs the PowerPoint generation in an async loop."""
82
+ loop = asyncio.new_event_loop()
83
+ asyncio.set_event_loop(loop)
84
+ loop.run_until_complete(ppt_content(data=data))
 
 
 
 
 
 
 
 
85
 
86
  def extract_data(feed):
87
+ """Extracts text from PDF and appends to `data` list."""
88
+ global data
89
+ data = [] # Reset data before extracting
90
  with pdfplumber.open(feed) as pdf:
91
+ for p in pdf.pages:
92
+ text = p.extract_text()
93
+ if text:
94
+ data.append(text)
 
 
 
 
 
 
 
 
95
 
96
  def main():
97
+ """Main Streamlit app function."""
98
+ st.title("AI-Powered PowerPoint Generator")
99
+
100
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
101
 
102
  if uploaded_file is not None:
103
  extract_data(uploaded_file)
104
 
105
+ if st.button("Generate PPT"):
106
+ try:
107
+ ai_ppt(data)
108
+ st.success("PowerPoint generation completed!")
109
+ except Exception as e:
110
+ st.error(f"Error generating PPT: {e}")
111
+
112
+ # Display PDF
113
  binary_data = uploaded_file.getvalue()
114
+ pdf_viewer(input=binary_data, width=700)
 
 
115
 
116
  if __name__ == '__main__':
 
117
  nest_asyncio.apply()
118
  main()