sarim commited on
Commit
6993c74
·
1 Parent(s): aa7149f

summarize text

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -6,8 +6,8 @@ from pydantic_ai.models.groq import GroqModel
6
  import nest_asyncio
7
  from pydantic_ai.messages import ModelMessage
8
  import pdfplumber
9
- #from transformers import pipeline
10
- #import torch
11
  import os
12
  import presentation as customClass
13
  from streamlit_pdf_viewer import pdf_viewer
@@ -26,14 +26,14 @@ model = GroqModel('llama-3.1-70b-versatile', api_key = api_key)
26
 
27
 
28
  # to summarize
29
- #summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
  #summarizer = pipeline('text2text-generation', model='describeai/gemini')
31
  #nlpaueb/legal-bert-base-uncased
32
 
33
 
34
 
35
 
36
- def split_into_token_chunks(text: str, max_tokens: int = 900) -> list:
37
  """
38
  Splits a long string into chunks of a specified maximum number of tokens (words).
39
 
@@ -87,16 +87,13 @@ async def ppt_content(data):
87
  # for i, chunk in enumerate(listOfString):
88
  # print(f"Chunk {i}:\n{chunk}\n")
89
 
90
- for x in listOfString:
91
- result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation {x}",message_history = message_history,deps=deps)
92
- if(len(message_history) > 5):
93
- message_history.pop()
94
- message_history.pop()
95
- message_history.pop()
96
- message_history.pop()
97
- message_history = result.all_messages()
98
- result_data.append(result.data)
99
- print(result_data[-1])
100
 
101
 
102
 
@@ -109,12 +106,16 @@ async def ppt_content(data):
109
 
110
 
111
  def ai_ppt(data):
112
- #call summerizer to summerize pdf
113
- # summary = summarizer("".join(data), max_length=400, min_length=100, truncation=True,do_sample=False)
 
 
 
 
 
114
 
115
- # summary_texts = [item['summary_text'] for item in summary]
116
  #summary_texts = [item['generated_text'] for item in summary]
117
- asyncio.run(ppt_content(data=data))
118
 
119
 
120
  def extract_data(feed):
 
6
  import nest_asyncio
7
  from pydantic_ai.messages import ModelMessage
8
  import pdfplumber
9
+ from transformers import pipeline
10
+ import torch
11
  import os
12
  import presentation as customClass
13
  from streamlit_pdf_viewer import pdf_viewer
 
26
 
27
 
28
  # to summarize
29
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
  #summarizer = pipeline('text2text-generation', model='describeai/gemini')
31
  #nlpaueb/legal-bert-base-uncased
32
 
33
 
34
 
35
 
36
+ def split_into_token_chunks(text: str, max_tokens: int = 5000) -> list:
37
  """
38
  Splits a long string into chunks of a specified maximum number of tokens (words).
39
 
 
87
  # for i, chunk in enumerate(listOfString):
88
  # print(f"Chunk {i}:\n{chunk}\n")
89
 
90
+
91
+ result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation {data}",
92
+ message_history = message_history,
93
+ deps=deps,
94
+ )
95
+
96
+ print(result.data)
 
 
 
97
 
98
 
99
 
 
106
 
107
 
108
  def ai_ppt(data):
109
+ #call summerizer to summerize pdf
110
+ summary_texts = []
111
+ listOfString = split_into_token_chunks("".join(data))
112
+ for x in listOfString:
113
+ summary = summarizer("".join(data), max_length=400, min_length=100, truncation=True,do_sample=False)
114
+ summary_texts .append([item['summary_text'] for item in summary])
115
+ print(summary_texts)
116
 
 
117
  #summary_texts = [item['generated_text'] for item in summary]
118
+ asyncio.run(ppt_content(data=summary_texts))
119
 
120
 
121
  def extract_data(feed):