Spaces:
Running
Running
summarize text
Browse files
app.py
CHANGED
@@ -6,8 +6,8 @@ from pydantic_ai.models.groq import GroqModel
|
|
6 |
import nest_asyncio
|
7 |
from pydantic_ai.messages import ModelMessage
|
8 |
import pdfplumber
|
9 |
-
|
10 |
-
|
11 |
import os
|
12 |
import presentation as customClass
|
13 |
from streamlit_pdf_viewer import pdf_viewer
|
@@ -26,14 +26,14 @@ model = GroqModel('llama-3.1-70b-versatile', api_key = api_key)
|
|
26 |
|
27 |
|
28 |
# to summarize
|
29 |
-
|
30 |
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
|
31 |
#nlpaueb/legal-bert-base-uncased
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
-
def split_into_token_chunks(text: str, max_tokens: int =
|
37 |
"""
|
38 |
Splits a long string into chunks of a specified maximum number of tokens (words).
|
39 |
|
@@ -87,16 +87,13 @@ async def ppt_content(data):
|
|
87 |
# for i, chunk in enumerate(listOfString):
|
88 |
# print(f"Chunk {i}:\n{chunk}\n")
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
message_history = result.all_messages()
|
98 |
-
result_data.append(result.data)
|
99 |
-
print(result_data[-1])
|
100 |
|
101 |
|
102 |
|
@@ -109,12 +106,16 @@ async def ppt_content(data):
|
|
109 |
|
110 |
|
111 |
def ai_ppt(data):
|
112 |
-
#call summerizer to summerize pdf
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
-
# summary_texts = [item['summary_text'] for item in summary]
|
116 |
#summary_texts = [item['generated_text'] for item in summary]
|
117 |
-
asyncio.run(ppt_content(data=
|
118 |
|
119 |
|
120 |
def extract_data(feed):
|
|
|
6 |
import nest_asyncio
|
7 |
from pydantic_ai.messages import ModelMessage
|
8 |
import pdfplumber
|
9 |
+
from transformers import pipeline
|
10 |
+
import torch
|
11 |
import os
|
12 |
import presentation as customClass
|
13 |
from streamlit_pdf_viewer import pdf_viewer
|
|
|
26 |
|
27 |
|
28 |
# to summarize
|
29 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
30 |
#summarizer = pipeline('text2text-generation', model='describeai/gemini')
|
31 |
#nlpaueb/legal-bert-base-uncased
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
+
def split_into_token_chunks(text: str, max_tokens: int = 5000) -> list:
|
37 |
"""
|
38 |
Splits a long string into chunks of a specified maximum number of tokens (words).
|
39 |
|
|
|
87 |
# for i, chunk in enumerate(listOfString):
|
88 |
# print(f"Chunk {i}:\n{chunk}\n")
|
89 |
|
90 |
+
|
91 |
+
result = agent.run_sync(user_prompt = f"Create me a powerpoint presentation {data}",
|
92 |
+
message_history = message_history,
|
93 |
+
deps=deps,
|
94 |
+
)
|
95 |
+
|
96 |
+
print(result.data)
|
|
|
|
|
|
|
97 |
|
98 |
|
99 |
|
|
|
106 |
|
107 |
|
108 |
def ai_ppt(data):
|
109 |
+
#call summerizer to summerize pdf
|
110 |
+
summary_texts = []
|
111 |
+
listOfString = split_into_token_chunks("".join(data))
|
112 |
+
for x in listOfString:
|
113 |
+
summary = summarizer("".join(data), max_length=400, min_length=100, truncation=True,do_sample=False)
|
114 |
+
summary_texts .append([item['summary_text'] for item in summary])
|
115 |
+
print(summary_texts)
|
116 |
|
|
|
117 |
#summary_texts = [item['generated_text'] for item in summary]
|
118 |
+
asyncio.run(ppt_content(data=summary_texts))
|
119 |
|
120 |
|
121 |
def extract_data(feed):
|