Prat0 commited on
Commit
11173d1
·
verified ·
1 Parent(s): 2de5d14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -5,14 +5,16 @@ from llama_index.embeddings.gemini import GeminiEmbedding
5
  from llama_index.llms.gemini import Gemini
6
  import os
7
  import PyPDF2
 
8
 
9
  # Function to chunk text into smaller pieces
10
- def chunk_text(text, chunk_size=9000):
11
  """Split the text into chunks of specified size."""
 
12
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
13
 
14
  # Load and index the legal document data
15
- def load_data(uploaded_files):
16
  documents = []
17
  for uploaded_file in uploaded_files:
18
  document_text = ""
@@ -28,15 +30,19 @@ def load_data(uploaded_files):
28
  for chunk in chunks:
29
  documents.append(Document(text=chunk))
30
 
 
31
  Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
32
- Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
 
 
33
  index = VectorStoreIndex.from_documents(documents)
34
  return index
35
 
36
- # Generate legal document summary
37
- def generate_summary(index, document_text):
 
38
  query_engine = index.as_query_engine()
39
- response = query_engine.query(f"""
40
  You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
41
  Analyze the following legal document and summarize it:
42
  {document_text}
@@ -54,7 +60,7 @@ def generate_summary(index, document_text):
54
  return response.response
55
 
56
  # Streamlit app
57
- def main():
58
  st.title("Legal Document Summarizer")
59
  st.write("Upload legal documents, and let our AI summarize them!")
60
 
@@ -65,8 +71,12 @@ def main():
65
  st.write("Analyzing legal documents...")
66
 
67
  # Load data and generate summaries
68
- index = load_data(uploaded_files)
 
69
  summaries = []
 
 
 
70
 
71
  for uploaded_file in uploaded_files:
72
  document_text = ""
@@ -80,8 +90,11 @@ def main():
80
  # Chunk the document text for summarization
81
  chunks = chunk_text(document_text)
82
  for chunk in chunks:
83
- summary = generate_summary(index, chunk)
84
- summaries.append(summary)
 
 
 
85
 
86
  st.write("## Legal Document Summaries")
87
  for i, summary in enumerate(summaries):
@@ -89,4 +102,6 @@ def main():
89
  st.write(summary)
90
 
91
  if __name__ == "__main__":
92
- main()
 
 
 
5
  from llama_index.llms.gemini import Gemini
6
  import os
7
  import PyPDF2
8
+ import asyncio
9
 
10
  # Function to chunk text into smaller pieces
11
+ def chunk_text(text, chunk_size=1000):
12
  """Split the text into chunks of specified size."""
13
+ print(f"Chunking text into {chunk_size}-character chunks...")
14
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
15
 
16
  # Load and index the legal document data
17
+ async def load_data(uploaded_files):
18
  documents = []
19
  for uploaded_file in uploaded_files:
20
  document_text = ""
 
30
  for chunk in chunks:
31
  documents.append(Document(text=chunk))
32
 
33
+ print("Setting up Gemini embedding and LLM...")
34
  Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
35
+ Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
36
+
37
+ print("Creating index from documents...")
38
  index = VectorStoreIndex.from_documents(documents)
39
  return index
40
 
41
+ # Asynchronously generate legal document summary
42
+ async def generate_summary(index, document_text):
43
+ print("Generating summary...")
44
  query_engine = index.as_query_engine()
45
+ response = await query_engine.query(f"""
46
  You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
47
  Analyze the following legal document and summarize it:
48
  {document_text}
 
60
  return response.response
61
 
62
  # Streamlit app
63
+ async def main():
64
  st.title("Legal Document Summarizer")
65
  st.write("Upload legal documents, and let our AI summarize them!")
66
 
 
71
  st.write("Analyzing legal documents...")
72
 
73
  # Load data and generate summaries
74
+ print("Loading data and creating index...")
75
+ index = await load_data(uploaded_files)
76
  summaries = []
77
+
78
+ # Collect tasks for asynchronous execution
79
+ tasks = []
80
 
81
  for uploaded_file in uploaded_files:
82
  document_text = ""
 
90
  # Chunk the document text for summarization
91
  chunks = chunk_text(document_text)
92
  for chunk in chunks:
93
+ tasks.append(generate_summary(index, chunk))
94
+
95
+ # Await all summaries
96
+ print("Awaiting summaries...")
97
+ summaries = await asyncio.gather(*tasks)
98
 
99
  st.write("## Legal Document Summaries")
100
  for i, summary in enumerate(summaries):
 
102
  st.write(summary)
103
 
104
  if __name__ == "__main__":
105
+ print("Starting application...")
106
+ asyncio.run(main())
107
+ print("Application finished.")