Spaces:

Prat0
/

LegalSummarizer

Sleeping

App Files Files Community

Prat0 commited on Aug 12, 2024

Commit

bd4e82f

verified ·

1 Parent(s): af78cdc

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -74

app.py CHANGED Viewed

@@ -1,47 +1,38 @@
 import streamlit as st
 from llama_index.core import Settings
-from llama_index.core import VectorStoreIndex, Document
 from llama_index.embeddings.gemini import GeminiEmbedding
 from llama_index.llms.gemini import Gemini
 import os
 import PyPDF2
-import asyncio
-# Function to chunk text into smaller pieces
-def chunk_text(text, chunk_size=9000):
-    """Split the text into chunks of specified size."""
-    print(f"Chunking text into {chunk_size}-character chunks...")
-    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-# Asynchronously load and convert text to documents
-async def load_documents(uploaded_files):
-    documents = []
-    tasks = []
-    for uploaded_file in uploaded_files:
-        tasks.append(convert_to_documents(uploaded_file, documents))
-    await asyncio.gather(*tasks)
     return documents
-# Convert uploaded file to documents
-async def convert_to_documents(uploaded_file, documents):
-    document_text = ""
-    if uploaded_file.type == "application/pdf":
-        pdf_reader = PyPDF2.PdfReader(uploaded_file)
-        for page in pdf_reader.pages:
-            document_text += page.extract_text()
-    else:
-        document_text = uploaded_file.getvalue().decode("utf-8")
-    # Chunk the document text
-    chunks = chunk_text(document_text)
-    for chunk in chunks:
-        documents.append(Document(text=chunk))
-# Asynchronously generate legal document summary
-async def generate_summary(index, document_text):
-    print("Generating summary...")
     query_engine = index.as_query_engine()
-    response = await query_engine.query(f"""
     You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
     Analyze the following legal document and summarize it:
     {document_text}
@@ -59,56 +50,37 @@ async def generate_summary(index, document_text):
     return response.response
 # Streamlit app
-async def main():
     st.title("Legal Document Summarizer")
-    st.write("Upload legal documents, and let our AI summarize them!")
     # File uploader
-    uploaded_files = st.file_uploader("Choose legal document files", type=["txt", "pdf"], accept_multiple_files=True)
-    if uploaded_files:
-        st.write("Analyzing legal documents...")
-        # Load data and convert to documents asynchronously
-        print("Loading data and converting to documents...")
-        documents = await load_documents(uploaded_files)
-        # Set up Gemini embedding and LLM
-        print("Setting up Gemini embedding and LLM...")
-        Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
-        Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
-        # Create index from documents
-        print("Creating index from documents...")
-        index = VectorStoreIndex.from_documents(documents)
-        # Generate summaries asynchronously
-        print("Generating summaries...")
-        tasks = []
-        for uploaded_file in uploaded_files:
-            document_text = ""
-            if uploaded_file.type == "application/pdf":
-                pdf_reader = PyPDF2.PdfReader(uploaded_file)
-                for page in pdf_reader.pages:
-                    document_text += page.extract_text()
-            else:
-                document_text = uploaded_file.getvalue().decode("utf-8")
-            # Chunk the document text for summarization
-            chunks = chunk_text(document_text)
-            for chunk in chunks:
-                tasks.append(generate_summary(index, chunk))
-        # Await all summaries
-        print("Awaiting summaries...")
-        summaries = await asyncio.gather(*tasks)
-        st.write("## Legal Document Summaries")
-        for i, summary in enumerate(summaries):
-            st.write(f"### Summary of Document {i + 1}")
-            st.write(summary)
 if __name__ == "__main__":
-    print("Starting application...")
-    asyncio.run(main())
-    print("Application finished.")

 import streamlit as st
 from llama_index.core import Settings
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
 from llama_index.embeddings.gemini import GeminiEmbedding
 from llama_index.llms.gemini import Gemini
+from llama_index.core import Document
+import google.generativeai as genai
 import os
 import PyPDF2
+from io import BytesIO
+# Set up Google API key
+os.environ["GOOGLE_API_KEY"] = "your_api_key_here"  # Replace with your actual API key
+# Configure Google Gemini
+Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
+Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
+def write_to_file(content, filename="test.pdf"):
+    with open(filename, "wb") as f:
+        f.write(content)
+def ingest_documents():
+    reader = SimpleDirectoryReader("./")
+    documents = reader.load_data()
     return documents
+def load_data(documents):
+    index = VectorStoreIndex.from_documents(documents)
+    return index
+# Generate legal document summary
+def generate_summary(index, document_text):
     query_engine = index.as_query_engine()
+    response = query_engine.query(f"""
     You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
     Analyze the following legal document and summarize it:
     {document_text}
     return response.response
 # Streamlit app
+def main():
     st.title("Legal Document Summarizer")
+    st.write("Upload a legal document, and let our AI summarize it!")
     # File uploader
+    uploaded_file = st.file_uploader("Choose a legal document file", type=["txt", "pdf"])
+    if uploaded_file is not None:
+        # Read file contents
+        if uploaded_file.type == "application/pdf":
+            pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
+            document_text = ""
+            for page in pdf_reader.pages:
+                document_text += page.extract_text()
+        else:
+            document_text = uploaded_file.getvalue().decode("utf-8")
+        # Write content to file
+        write_to_file(uploaded_file.getvalue())
+        st.write("Analyzing legal document...")
+        # Ingest documents using SimpleDirectoryReader
+        documents = ingest_documents()
+        # Load data and generate summary
+        index = load_data(documents)
+        summary = generate_summary(index, document_text)
+        st.write("## Legal Document Summary")
+        st.write(summary)
 if __name__ == "__main__":
+    main()