Spaces:
Sleeping
Sleeping
import streamlit as st | |
from llama_index.core import Settings | |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext | |
from llama_index.embeddings.gemini import GeminiEmbedding | |
from llama_index.llms.gemini import Gemini | |
from llama_index.core import Document | |
import google.generativeai as genai | |
import os | |
import PyPDF2 | |
from io import BytesIO | |
from llama_index.embeddings.fastembed import FastEmbedEmbedding | |
# Configure Google Gemini | |
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro") | |
def write_to_file(content, filename="./files/test.pdf"): | |
with open(filename, "wb") as f: | |
f.write(content) | |
def ingest_documents(): | |
reader = SimpleDirectoryReader("./files/") | |
documents = reader.load_data() | |
print(documents) | |
return documents | |
def load_data(documents): | |
index = VectorStoreIndex.from_documents(documents) | |
return index | |
# Generate legal document summary | |
def generate_summary(index, document_text): | |
query_engine = index.as_query_engine() | |
response = query_engine.query(f""" | |
You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document. | |
Analyze the following document and summarize it: | |
{document_text} | |
Please cover the following aspects: | |
1. Document type and purpose | |
2. Key parties involved | |
3. Main clauses and provisions | |
4. Important dates and deadlines | |
5. Potential legal implications | |
6. Any notable or unusual elements | |
Provide a clear, concise, and professional summary | |
""") | |
return response.response | |
# Streamlit app | |
def main(): | |
st.title("Legal Document Summarizer") | |
st.write("Upload a legal document, and let our AI summarize it!") | |
# File uploader | |
uploaded_file = st.file_uploader("Choose a legal document file", type=["txt", "pdf"]) | |
if uploaded_file is not None: | |
# Read file contents | |
if uploaded_file.type == "application/pdf": | |
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue())) | |
document_text = "" | |
for page in pdf_reader.pages: | |
document_text += page.extract_text() | |
else: | |
document_text = uploaded_file.getvalue().decode("utf-8") | |
# Write content to file | |
write_to_file(uploaded_file.getvalue()) | |
st.write("Analyzing legal document...") | |
# Ingest documents using SimpleDirectoryReader | |
documents = ingest_documents() | |
# Load data and generate summary | |
index = load_data(documents) | |
summary = generate_summary(index, document_text) | |
st.write("## Legal Document Summary") | |
st.write(summary) | |
if __name__ == "__main__": | |
main() |