Spaces:

Prat0
/

LegalSummarizer

Sleeping

App Files Files Community

Prat0 commited on Aug 11, 2024

Commit

11173d1

verified ·

1 Parent(s): 2de5d14

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -11

app.py CHANGED Viewed

@@ -5,14 +5,16 @@ from llama_index.embeddings.gemini import GeminiEmbedding
 from llama_index.llms.gemini import Gemini
 import os
 import PyPDF2
 # Function to chunk text into smaller pieces
-def chunk_text(text, chunk_size=9000):
     """Split the text into chunks of specified size."""
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 # Load and index the legal document data
-def load_data(uploaded_files):
     documents = []
     for uploaded_file in uploaded_files:
         document_text = ""
@@ -28,15 +30,19 @@ def load_data(uploaded_files):
         for chunk in chunks:
             documents.append(Document(text=chunk))
     Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
-    Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
     index = VectorStoreIndex.from_documents(documents)
     return index
-# Generate legal document summary
-def generate_summary(index, document_text):
     query_engine = index.as_query_engine()
-    response = query_engine.query(f"""
     You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
     Analyze the following legal document and summarize it:
     {document_text}
@@ -54,7 +60,7 @@ def generate_summary(index, document_text):
     return response.response
 # Streamlit app
-def main():
     st.title("Legal Document Summarizer")
     st.write("Upload legal documents, and let our AI summarize them!")
@@ -65,8 +71,12 @@ def main():
         st.write("Analyzing legal documents...")
         # Load data and generate summaries
-        index = load_data(uploaded_files)
         summaries = []
         for uploaded_file in uploaded_files:
             document_text = ""
@@ -80,8 +90,11 @@ def main():
             # Chunk the document text for summarization
             chunks = chunk_text(document_text)
             for chunk in chunks:
-                summary = generate_summary(index, chunk)
-                summaries.append(summary)
         st.write("## Legal Document Summaries")
         for i, summary in enumerate(summaries):
@@ -89,4 +102,6 @@ def main():
             st.write(summary)
 if __name__ == "__main__":
-    main()

 from llama_index.llms.gemini import Gemini
 import os
 import PyPDF2
+import asyncio
 # Function to chunk text into smaller pieces
+def chunk_text(text, chunk_size=1000):
     """Split the text into chunks of specified size."""
+    print(f"Chunking text into {chunk_size}-character chunks...")
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 # Load and index the legal document data
+async def load_data(uploaded_files):
     documents = []
     for uploaded_file in uploaded_files:
         document_text = ""
         for chunk in chunks:
             documents.append(Document(text=chunk))
+    print("Setting up Gemini embedding and LLM...")
     Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
+    Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
+    print("Creating index from documents...")
     index = VectorStoreIndex.from_documents(documents)
     return index
+# Asynchronously generate legal document summary
+async def generate_summary(index, document_text):
+    print("Generating summary...")
     query_engine = index.as_query_engine()
+    response = await query_engine.query(f"""
     You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
     Analyze the following legal document and summarize it:
     {document_text}
     return response.response
 # Streamlit app
+async def main():
     st.title("Legal Document Summarizer")
     st.write("Upload legal documents, and let our AI summarize them!")
         st.write("Analyzing legal documents...")
         # Load data and generate summaries
+        print("Loading data and creating index...")
+        index = await load_data(uploaded_files)
         summaries = []
+        # Collect tasks for asynchronous execution
+        tasks = []
         for uploaded_file in uploaded_files:
             document_text = ""
             # Chunk the document text for summarization
             chunks = chunk_text(document_text)
             for chunk in chunks:
+                tasks.append(generate_summary(index, chunk))
+        # Await all summaries
+        print("Awaiting summaries...")
+        summaries = await asyncio.gather(*tasks)
         st.write("## Legal Document Summaries")
         for i, summary in enumerate(summaries):
             st.write(summary)
 if __name__ == "__main__":
+    print("Starting application...")
+    asyncio.run(main())
+    print("Application finished.")