Prat0 commited on
Commit
47d3f08
·
verified ·
1 Parent(s): 9c21465

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -34
app.py CHANGED
@@ -1,24 +1,35 @@
1
  import streamlit as st
2
  from llama_index.core import Settings
3
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
4
  from llama_index.embeddings.gemini import GeminiEmbedding
5
  from llama_index.llms.gemini import Gemini
6
- from llama_index.core import Document
7
- import google.generativeai as genai
8
- #import streamlit_analytics2 as streamlit_analytics
9
-
10
- # Set up Google API key
11
  import os
 
12
 
13
- # Configure Google Gemini
 
 
 
14
 
15
  # Load and index the legal document data
16
  def load_data(uploaded_files):
17
- documents = [Document(text=t) for t in uploaded_files]
18
- #documents = SimpleDirectoryReader(input_files=[uploaded_files]).load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
20
- Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
21
- llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
22
  index = VectorStoreIndex.from_documents(documents)
23
  return index
24
 
@@ -45,33 +56,37 @@ def generate_summary(index, document_text):
45
  # Streamlit app
46
  def main():
47
  st.title("Legal Document Summarizer")
48
- st.write("Upload a legal document, and let our AI summarize it!")
49
 
50
  # File uploader
51
- uploaded_file = st.file_uploader("Choose a legal document file", type=["txt", "pdf"])
52
 
53
- if uploaded_file is not None:
54
- # Read file contents
55
- if uploaded_file.type == "application/pdf":
56
- # You'll need to install PyPDF2 for this
57
- import PyPDF2
58
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
59
- document_text = ""
60
- l = []
61
- for page in pdf_reader.pages:
62
- document_text += page.extract_text()
63
- l.append(page.extract_text())
64
- else:
65
- document_text = uploaded_file.getvalue().decode("utf-8")
66
 
67
- st.write("Analyzing legal document...")
68
-
69
- # Load data and generate summary
70
- index = load_data(l)
71
- summary = generate_summary(index, document_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- st.write("## Legal Document Summary")
74
- st.write(summary)
 
 
75
 
76
  if __name__ == "__main__":
77
- main()
 
1
  import streamlit as st
2
  from llama_index.core import Settings
3
+ from llama_index.core import VectorStoreIndex, Document
4
  from llama_index.embeddings.gemini import GeminiEmbedding
5
  from llama_index.llms.gemini import Gemini
 
 
 
 
 
6
  import os
7
+ import PyPDF2
8
 
9
+ # Function to chunk text into smaller pieces
10
+ def chunk_text(text, chunk_size=1000):
11
+ """Split the text into chunks of specified size."""
12
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
13
 
14
  # Load and index the legal document data
15
  def load_data(uploaded_files):
16
+ documents = []
17
+ for uploaded_file in uploaded_files:
18
+ document_text = ""
19
+ if uploaded_file.type == "application/pdf":
20
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
21
+ for page in pdf_reader.pages:
22
+ document_text += page.extract_text()
23
+ else:
24
+ document_text = uploaded_file.getvalue().decode("utf-8")
25
+
26
+ # Chunk the document text
27
+ chunks = chunk_text(document_text)
28
+ for chunk in chunks:
29
+ documents.append(Document(text=chunk))
30
+
31
  Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
32
+ Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
 
33
  index = VectorStoreIndex.from_documents(documents)
34
  return index
35
 
 
56
  # Streamlit app
57
  def main():
58
  st.title("Legal Document Summarizer")
59
+ st.write("Upload legal documents, and let our AI summarize them!")
60
 
61
  # File uploader
62
+ uploaded_files = st.file_uploader("Choose legal document files", type=["txt", "pdf"], accept_multiple_files=True)
63
 
64
+ if uploaded_files:
65
+ st.write("Analyzing legal documents...")
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ # Load data and generate summaries
68
+ index = load_data(uploaded_files)
69
+ summaries = []
70
+
71
+ for uploaded_file in uploaded_files:
72
+ document_text = ""
73
+ if uploaded_file.type == "application/pdf":
74
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
75
+ for page in pdf_reader.pages:
76
+ document_text += page.extract_text()
77
+ else:
78
+ document_text = uploaded_file.getvalue().decode("utf-8")
79
+
80
+ # Chunk the document text for summarization
81
+ chunks = chunk_text(document_text)
82
+ for chunk in chunks:
83
+ summary = generate_summary(index, chunk)
84
+ summaries.append(summary)
85
 
86
+ st.write("## Legal Document Summaries")
87
+ for i, summary in enumerate(summaries):
88
+ st.write(f"### Summary of Document {i + 1}")
89
+ st.write(summary)
90
 
91
  if __name__ == "__main__":
92
+ main()