binqiangliu commited on
Commit
48daa24
·
1 Parent(s): 2ef277b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -30
app.py CHANGED
@@ -29,52 +29,31 @@ with open(css_file) as f:
29
 
30
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
31
 
32
- #documents=[]
33
-
34
  def generate_random_string(length):
35
  letters = string.ascii_lowercase
36
  return ''.join(random.choice(letters) for i in range(length))
37
 
38
- #random_string = generate_random_string(20)
39
- #directory_path=random_string
40
-
41
- #if "pdf_files" not in st.session_state:
42
- #st.session_state.pdf_files = None
43
-
44
- #if "documents" not in st.session_state:
45
- #st.session_state.documents = None
46
-
47
- if "new_index" not in st.session_state:
48
- st.session_state.new_index = None
49
-
50
- if "i_pdf_files" not in st.session_state:
51
- st.session_state.i_pdf_files = None
52
-
53
  if "query_engine" not in st.session_state:
54
  st.session_state.query_engine = None
55
 
56
  with st.sidebar:
57
  st.subheader("Upload your Documents Here: ")
58
- pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
59
- st.session_state.i_pdf_files = pdf_files
60
- if not st.session_state.i_pdf_files:
61
  st.warning("请上传文档文件")
62
  st.stop()
63
  else:
64
- if "uploadedfile_path" not in st.session_state:
65
- st.session_state.uploadedfile_path=generate_random_string(20)
66
- if not os.path.exists(st.session_state.uploadedfile_path):
67
- os.makedirs(st.session_state.uploadedfile_path)
68
- for pdf_file in st.session_state.i_pdf_files:
69
- file_path = os.path.join(st.session_state.uploadedfile_path, pdf_file.name)
70
  with open(file_path, 'wb') as f:
71
  f.write(pdf_file.read())
72
  st.success(f"File '{pdf_file.name}' saved successfully.")
 
73
  try:
74
  start_1 = timeit.default_timer() # Start timer
75
  st.write(f"QA文档加载开始:{start_1}")
76
- if "documents" not in st.session_state:
77
- st.session_state.documents = SimpleDirectoryReader(st.session_state.uploadedfile_path).load_data()
78
  end_1 = timeit.default_timer() # Start timer
79
  st.write(f"QA文档加载结束:{end_1}")
80
  st.write(f"QA文档加载耗时:{end_1 - start_1}")
@@ -95,13 +74,13 @@ with st.sidebar:
95
  st.session_state.service_context = ServiceContext.from_defaults(llm_predictor=st.session_state.llm_predictor, embed_model=st.session_state.embed_model)
96
  start_3 = timeit.default_timer() # Start timer
97
  st.write(f"向量库构建开始:{start_3}")
98
- st.session_state.new_index = VectorStoreIndex.from_documents(st.session_state.documents, service_context=st.session_state.service_context)
99
  end_3 = timeit.default_timer() # Start timer
100
  st.write(f"向量库构建结束:{end_3}")
101
  st.write(f"向量库构建耗时:{end_3 - start_3}")
102
  directory_path = generate_random_string(20)
103
  os.makedirs(directory_path)
104
- st.session_state.new_index.storage_context.persist("directory_path")
105
  storage_context = StorageContext.from_defaults(persist_dir="directory_path")
106
  start_4 = timeit.default_timer() # Start timer
107
  st.write(f"向量库装载开始:{start_4}")
 
29
 
30
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
31
 
 
 
32
  def generate_random_string(length):
33
  letters = string.ascii_lowercase
34
  return ''.join(random.choice(letters) for i in range(length))
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  if "query_engine" not in st.session_state:
37
  st.session_state.query_engine = None
38
 
39
  with st.sidebar:
40
  st.subheader("Upload your Documents Here: ")
41
+ pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
42
+ if not pdf_files:
 
43
  st.warning("请上传文档文件")
44
  st.stop()
45
  else:
46
+ uploadedfile_path=generate_random_string(20)
47
+ for pdf_file in pdf_files:
48
+ file_path = os.path.join(uploadedfile_path, pdf_file.name)
 
 
 
49
  with open(file_path, 'wb') as f:
50
  f.write(pdf_file.read())
51
  st.success(f"File '{pdf_file.name}' saved successfully.")
52
+ if st.button('Process for QA'):
53
  try:
54
  start_1 = timeit.default_timer() # Start timer
55
  st.write(f"QA文档加载开始:{start_1}")
56
+ documents = SimpleDirectoryReader(uploadedfile_path).load_data()
 
57
  end_1 = timeit.default_timer() # Start timer
58
  st.write(f"QA文档加载结束:{end_1}")
59
  st.write(f"QA文档加载耗时:{end_1 - start_1}")
 
74
  st.session_state.service_context = ServiceContext.from_defaults(llm_predictor=st.session_state.llm_predictor, embed_model=st.session_state.embed_model)
75
  start_3 = timeit.default_timer() # Start timer
76
  st.write(f"向量库构建开始:{start_3}")
77
+ new_index = VectorStoreIndex.from_documents(documents, service_context=st.session_state.service_context)
78
  end_3 = timeit.default_timer() # Start timer
79
  st.write(f"向量库构建结束:{end_3}")
80
  st.write(f"向量库构建耗时:{end_3 - start_3}")
81
  directory_path = generate_random_string(20)
82
  os.makedirs(directory_path)
83
+ new_index.storage_context.persist("directory_path")
84
  storage_context = StorageContext.from_defaults(persist_dir="directory_path")
85
  start_4 = timeit.default_timer() # Start timer
86
  st.write(f"向量库装载开始:{start_4}")