girishwangikar commited on
Commit
a0f19e1
·
verified ·
1 Parent(s): 3600dcc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google.colab import userdata
3
+ import gradio as gr
4
+ from langchain_groq import ChatGroq
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain.docstore.document import Document
8
+ import PyPDF2
9
+ from langchain.prompts import PromptTemplate
10
+
11
+ # Set up API keys
12
+ hf_api_key = userdata.get('HF_TOKEN')
13
+ groq_api_key = userdata.get('GROQ_API_KEY')
14
+ os.environ['HF_TOKEN'] = hf_api_key
15
+ os.environ['GROQ_API_KEY'] = groq_api_key
16
+
17
+ # Set up LLM
18
+ llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
19
+ def extract_text_from_pdf(pdf_file):
20
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
21
+ text = ""
22
+ for page in pdf_reader.pages:
23
+ text += page.extract_text()
24
+ return text
25
+
26
+ def chunk_text(text):
27
+ text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=4000,
29
+ chunk_overlap=400,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_text(text)
33
+ return [Document(page_content=chunk) for chunk in chunks]
34
+
35
+ def summarize_chunks(chunks):
36
+ # Prompt for the initial summarization of each chunk
37
+ map_prompt_template = """Write a detailed summary of the following text:
38
+ "{text}"
39
+ DETAILED SUMMARY:"""
40
+ map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
41
+
42
+ # Prompt for combining the summaries
43
+ combine_prompt_template = """Write a comprehensive summary of the following text, capturing key points and main ideas:
44
+ "{text}"
45
+ COMPREHENSIVE SUMMARY:"""
46
+ combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
47
+
48
+ # Check the total length of the chunks
49
+ total_length = sum(len(chunk.page_content) for chunk in chunks)
50
+
51
+ if total_length < 10000: # For shorter documents
52
+ chain = load_summarize_chain(
53
+ llm,
54
+ chain_type="stuff",
55
+ prompt=combine_prompt
56
+ )
57
+ else: # For longer documents
58
+ chain = load_summarize_chain(
59
+ llm,
60
+ chain_type="map_reduce",
61
+ map_prompt=map_prompt,
62
+ combine_prompt=combine_prompt,
63
+ verbose=True
64
+ )
65
+
66
+ summary = chain.run(chunks)
67
+ return summary
68
+
69
+ def summarize_content(pdf_file, text_input):
70
+ if pdf_file is None and not text_input:
71
+ return "Please upload a PDF file or enter text to summarize."
72
+
73
+ if pdf_file is not None:
74
+ # Extract text from PDF
75
+ text = extract_text_from_pdf(pdf_file)
76
+ else:
77
+ # Use the input text
78
+ text = text_input
79
+
80
+ # Chunk the text
81
+ chunks = chunk_text(text)
82
+
83
+ # Summarize chunks
84
+ final_summary = summarize_chunks(chunks)
85
+ return final_summary
86
+
87
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
88
+ gr.Markdown(
89
+ """
90
+ # PDF And Text Summarizer
91
+ ### Advanced PDF and Text Summarization -
92
+
93
+ Upload your PDF document or enter text directly, and let AI generate a concise, informative summary.
94
+ """
95
+ )
96
+
97
+ with gr.Row():
98
+ with gr.Column(scale=1):
99
+ input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
100
+ input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
101
+ submit_btn = gr.Button("Generate Summary", variant="primary")
102
+
103
+ with gr.Column(scale=2):
104
+ output = gr.Textbox(label="Generated Summary", lines=10)
105
+
106
+ gr.Markdown(
107
+ """
108
+ ### How it works
109
+ 1. Upload a PDF file or enter text directly
110
+ 2. Click "Generate Summary"
111
+ 3. Wait for the AI to process and summarize your content
112
+ 4. Review the generated summary
113
+
114
+ *Powered by LLAMA 3.1 8B model and LangChain*
115
+ """
116
+ )
117
+
118
+ submit_btn.click(summarize_content, inputs=[input_pdf, input_text], outputs=output)
119
+
120
+ iface.launch()