shallou commited on
Commit
0128aff
·
verified ·
1 Parent(s): 131ff8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -40
app.py CHANGED
@@ -1,11 +1,11 @@
1
- from dotenv import load_dotenv
2
- import streamlit as st
3
- import pickle
4
- from PyPDF2 import PdfReader
5
- from transformers import pipeline, AutoTokenizer, AutoModel
6
  import os
7
- import torch
8
  import numpy as np
 
 
 
 
 
9
 
10
  # Load environment variables from .env file
11
  load_dotenv()
@@ -15,24 +15,14 @@ def chunk_text(text, chunk_size=1000, chunk_overlap=200):
15
  chunks = []
16
  i = 0
17
  while i < len(text):
18
- # Ensure chunk size and overlap are handled properly
19
  chunks.append(text[i:i + chunk_size])
20
  i += chunk_size - chunk_overlap
21
  return chunks
22
 
23
- # Function to generate embeddings using transformers
24
- def generate_embeddings(text_chunks, model_name='sentence-transformers/all-MiniLM-L6-v2'):
25
- tokenizer = AutoTokenizer.from_pretrained(model_name)
26
- model = AutoModel.from_pretrained(model_name)
27
-
28
- embeddings = []
29
- for text in text_chunks:
30
- # Tokenize the text and generate embeddings
31
- inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
32
- with torch.no_grad():
33
- outputs = model(**inputs)
34
- # Mean pooling on the last hidden state
35
- embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
36
  return embeddings
37
 
38
  # Function to find the most relevant chunk based on the cosine similarity
@@ -52,7 +42,6 @@ def main():
52
 
53
  if pdf is not None:
54
  pdf_reader = PdfReader(pdf)
55
-
56
  text = ""
57
  for page in pdf_reader.pages:
58
  text += page.extract_text()
@@ -89,8 +78,8 @@ def main():
89
  result = qa_pipeline(question=query, context=best_chunk)
90
  st.write(result['answer'])
91
 
92
- if __name__ == '__main__':
93
- main()
94
 
95
  def set_bg_from_url(url, opacity=1):
96
  footer = """
@@ -116,20 +105,5 @@ def set_bg_from_url(url, opacity=1):
116
  </footer>
117
  """
118
  st.markdown(footer, unsafe_allow_html=True)
119
-
120
- # Set background image using HTML and CSS
121
- st.markdown(
122
- f"""
123
- <style>
124
- body {{
125
- background: url('{url}') no-repeat center center fixed;
126
- background-size: cover;
127
- opacity: {opacity};
128
- }}
129
- </style>
130
- """,
131
- unsafe_allow_html=True
132
- )
133
-
134
- # Set background image from URL
135
- set_bg_from_url("https://www.1access.com/wp-content/uploads/2019/10/GettyImages-1180389186.jpg", opacity=0.875)
 
 
 
 
 
 
1
  import os
2
+ import pickle
3
  import numpy as np
4
+ from PyPDF2 import PdfReader
5
+ from transformers import pipeline
6
+ from sentence_transformers import SentenceTransformer
7
+ from dotenv import load_dotenv
8
+ import streamlit as st
9
 
10
  # Load environment variables from .env file
11
  load_dotenv()
 
15
  chunks = []
16
  i = 0
17
  while i < len(text):
 
18
  chunks.append(text[i:i + chunk_size])
19
  i += chunk_size - chunk_overlap
20
  return chunks
21
 
22
+ # Function to generate embeddings using sentence-transformers
23
+ def generate_embeddings(text_chunks, model_name='all-MiniLM-L6-v2'):
24
+ model = SentenceTransformer(model_name)
25
+ embeddings = model.encode(text_chunks, convert_to_tensor=False)
 
 
 
 
 
 
 
 
 
26
  return embeddings
27
 
28
  # Function to find the most relevant chunk based on the cosine similarity
 
42
 
43
  if pdf is not None:
44
  pdf_reader = PdfReader(pdf)
 
45
  text = ""
46
  for page in pdf_reader.pages:
47
  text += page.extract_text()
 
78
  result = qa_pipeline(question=query, context=best_chunk)
79
  st.write(result['answer'])
80
 
81
+ # Set background image from URL
82
+ set_bg_from_url("https://www.1access.com/wp-content/uploads/2019/10/GettyImages-1180389186.jpg", opacity=0.5)
83
 
84
  def set_bg_from_url(url, opacity=1):
85
  footer = """
 
105
  </footer>
106
  """
107
  st.markdown(footer, unsafe_allow_html=True)
108
+
109
+ # Set background image using