|
import os |
|
import gradio as gr |
|
from langchain_groq import ChatGroq |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains.summarize import load_summarize_chain |
|
from langchain.docstore.document import Document |
|
import PyPDF2 |
|
from langchain.prompts import PromptTemplate |
|
|
|
|
|
groq_api_key = os.environ.get('GROQ_API_KEY') |
|
|
|
|
|
llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key) |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
def chunk_text(text): |
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=4000, |
|
chunk_overlap=400, |
|
length_function=len |
|
) |
|
chunks = text_splitter.split_text(text) |
|
return [Document(page_content=chunk) for chunk in chunks] |
|
|
|
def summarize_chunks(chunks, conciseness): |
|
|
|
map_prompt_template = f"""Write a {'very concise' if conciseness > 0.7 else 'detailed'} summary of the following text, focusing on the {'most crucial' if conciseness > 0.7 else 'key'} points: |
|
"{{text}}" |
|
{'CONCISE' if conciseness > 0.7 else 'DETAILED'} SUMMARY:""" |
|
|
|
combine_prompt_template = f"""Write a {'highly condensed' if conciseness > 0.7 else 'comprehensive'} summary of the following text, capturing the {'essential' if conciseness > 0.7 else 'key'} points and main ideas: |
|
"{{text}}" |
|
{'CONDENSED' if conciseness > 0.7 else 'COMPREHENSIVE'} SUMMARY:""" |
|
|
|
map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"]) |
|
combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"]) |
|
|
|
|
|
total_length = sum(len(chunk.page_content) for chunk in chunks) |
|
if total_length < 10000 or conciseness > 0.8: |
|
chain = load_summarize_chain( |
|
llm, |
|
chain_type="stuff", |
|
prompt=combine_prompt |
|
) |
|
else: |
|
chain = load_summarize_chain( |
|
llm, |
|
chain_type="map_reduce", |
|
map_prompt=map_prompt, |
|
combine_prompt=combine_prompt, |
|
verbose=True |
|
) |
|
|
|
summary = chain.run(chunks) |
|
return summary |
|
|
|
def summarize_content(pdf_file, text_input, conciseness): |
|
if pdf_file is None and not text_input: |
|
return "Please upload a PDF file or enter text to summarize." |
|
|
|
if pdf_file is not None: |
|
|
|
text = extract_text_from_pdf(pdf_file) |
|
else: |
|
|
|
text = text_input |
|
|
|
|
|
chunks = chunk_text(text) |
|
|
|
|
|
final_summary = summarize_chunks(chunks, conciseness) |
|
return final_summary |
|
|
|
FOOTER_TEXT = """ |
|
<footer> |
|
<p>If you enjoyed the functionality of the app, please leave a like!<br> |
|
Check out more on |
|
<a href="https://www.linkedin.com/in/girish-wangikar/" target="_blank">LinkedIn</a> | |
|
<a href="https://girishwangikar.github.io/Girish_Wangikar_Portfolio.github.io/" target="_blank">Portfolio</a> |
|
</p> |
|
</footer> |
|
""" |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as iface: |
|
|
|
gr.Markdown( |
|
""" |
|
<style> |
|
.title { |
|
text-align: center; |
|
} |
|
.description { |
|
text-align: center; |
|
} |
|
footer { |
|
text-align: center; |
|
padding: 10px; |
|
width: 100%; |
|
background-color: rgba(240, 240, 240, 0.8); |
|
z-index: 1000; |
|
position: relative; |
|
margin-top: 10px; |
|
color: black; |
|
} |
|
/* Optional: Adjust link styles in footer */ |
|
footer a { |
|
color: #1a0dab; |
|
text-decoration: none; |
|
} |
|
footer a:hover { |
|
text-decoration: underline; |
|
} |
|
</style> |
|
""" |
|
) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
<div class="title"> |
|
# PDF And Text Summarizer |
|
</div> |
|
<div class="description"> |
|
### Advanced PDF and Text Summarization with Conciseness Control |
|
- Upload your PDF document or enter text directly, adjust the conciseness level, and let AI generate a summary. |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"]) |
|
input_text = gr.Textbox( |
|
label="Or enter text here", |
|
lines=5, |
|
placeholder="Paste or type your text here..." |
|
) |
|
conciseness_slider = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
value=0.5, |
|
step=0.1, |
|
label="Conciseness Level" |
|
) |
|
submit_btn = gr.Button("Generate Summary", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
output = gr.Textbox(label="Generated Summary", lines=10) |
|
|
|
gr.Markdown( |
|
""" |
|
### How it works |
|
1. **Upload a PDF file or enter text directly** |
|
2. **Adjust the conciseness level:** |
|
- 0 (Most detailed) to 1 (Most concise) |
|
3. **Click "Generate Summary"** |
|
4. **Wait for the AI to process and summarize your content** |
|
5. **Review the generated summary** |
|
*Powered by LLAMA 3.1 8B model and LangChain* |
|
""" |
|
) |
|
|
|
|
|
gr.Markdown(FOOTER_TEXT) |
|
|
|
|
|
submit_btn.click( |
|
summarize_content, |
|
inputs=[input_pdf, input_text, conciseness_slider], |
|
outputs=output |
|
) |
|
|
|
iface.launch() |
|
|