Spaces:
Sleeping
Sleeping
Commit
·
48daa24
1
Parent(s):
2ef277b
Update app.py
Browse files
app.py
CHANGED
@@ -29,52 +29,31 @@ with open(css_file) as f:
|
|
29 |
|
30 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
31 |
|
32 |
-
#documents=[]
|
33 |
-
|
34 |
def generate_random_string(length):
|
35 |
letters = string.ascii_lowercase
|
36 |
return ''.join(random.choice(letters) for i in range(length))
|
37 |
|
38 |
-
#random_string = generate_random_string(20)
|
39 |
-
#directory_path=random_string
|
40 |
-
|
41 |
-
#if "pdf_files" not in st.session_state:
|
42 |
-
#st.session_state.pdf_files = None
|
43 |
-
|
44 |
-
#if "documents" not in st.session_state:
|
45 |
-
#st.session_state.documents = None
|
46 |
-
|
47 |
-
if "new_index" not in st.session_state:
|
48 |
-
st.session_state.new_index = None
|
49 |
-
|
50 |
-
if "i_pdf_files" not in st.session_state:
|
51 |
-
st.session_state.i_pdf_files = None
|
52 |
-
|
53 |
if "query_engine" not in st.session_state:
|
54 |
st.session_state.query_engine = None
|
55 |
|
56 |
with st.sidebar:
|
57 |
st.subheader("Upload your Documents Here: ")
|
58 |
-
pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
|
59 |
-
|
60 |
-
if not st.session_state.i_pdf_files:
|
61 |
st.warning("请上传文档文件")
|
62 |
st.stop()
|
63 |
else:
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
os.makedirs(st.session_state.uploadedfile_path)
|
68 |
-
for pdf_file in st.session_state.i_pdf_files:
|
69 |
-
file_path = os.path.join(st.session_state.uploadedfile_path, pdf_file.name)
|
70 |
with open(file_path, 'wb') as f:
|
71 |
f.write(pdf_file.read())
|
72 |
st.success(f"File '{pdf_file.name}' saved successfully.")
|
|
|
73 |
try:
|
74 |
start_1 = timeit.default_timer() # Start timer
|
75 |
st.write(f"QA文档加载开始:{start_1}")
|
76 |
-
|
77 |
-
st.session_state.documents = SimpleDirectoryReader(st.session_state.uploadedfile_path).load_data()
|
78 |
end_1 = timeit.default_timer() # Start timer
|
79 |
st.write(f"QA文档加载结束:{end_1}")
|
80 |
st.write(f"QA文档加载耗时:{end_1 - start_1}")
|
@@ -95,13 +74,13 @@ with st.sidebar:
|
|
95 |
st.session_state.service_context = ServiceContext.from_defaults(llm_predictor=st.session_state.llm_predictor, embed_model=st.session_state.embed_model)
|
96 |
start_3 = timeit.default_timer() # Start timer
|
97 |
st.write(f"向量库构建开始:{start_3}")
|
98 |
-
|
99 |
end_3 = timeit.default_timer() # Start timer
|
100 |
st.write(f"向量库构建结束:{end_3}")
|
101 |
st.write(f"向量库构建耗时:{end_3 - start_3}")
|
102 |
directory_path = generate_random_string(20)
|
103 |
os.makedirs(directory_path)
|
104 |
-
|
105 |
storage_context = StorageContext.from_defaults(persist_dir="directory_path")
|
106 |
start_4 = timeit.default_timer() # Start timer
|
107 |
st.write(f"向量库装载开始:{start_4}")
|
|
|
29 |
|
30 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
31 |
|
|
|
|
|
32 |
def generate_random_string(length):
|
33 |
letters = string.ascii_lowercase
|
34 |
return ''.join(random.choice(letters) for i in range(length))
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
if "query_engine" not in st.session_state:
|
37 |
st.session_state.query_engine = None
|
38 |
|
39 |
with st.sidebar:
|
40 |
st.subheader("Upload your Documents Here: ")
|
41 |
+
pdf_files = st.file_uploader("Choose your PDF Files and Press OK", type=['pdf'], accept_multiple_files=True)
|
42 |
+
if not pdf_files:
|
|
|
43 |
st.warning("请上传文档文件")
|
44 |
st.stop()
|
45 |
else:
|
46 |
+
uploadedfile_path=generate_random_string(20)
|
47 |
+
for pdf_file in pdf_files:
|
48 |
+
file_path = os.path.join(uploadedfile_path, pdf_file.name)
|
|
|
|
|
|
|
49 |
with open(file_path, 'wb') as f:
|
50 |
f.write(pdf_file.read())
|
51 |
st.success(f"File '{pdf_file.name}' saved successfully.")
|
52 |
+
if st.button('Process for QA'):
|
53 |
try:
|
54 |
start_1 = timeit.default_timer() # Start timer
|
55 |
st.write(f"QA文档加载开始:{start_1}")
|
56 |
+
documents = SimpleDirectoryReader(uploadedfile_path).load_data()
|
|
|
57 |
end_1 = timeit.default_timer() # Start timer
|
58 |
st.write(f"QA文档加载结束:{end_1}")
|
59 |
st.write(f"QA文档加载耗时:{end_1 - start_1}")
|
|
|
74 |
st.session_state.service_context = ServiceContext.from_defaults(llm_predictor=st.session_state.llm_predictor, embed_model=st.session_state.embed_model)
|
75 |
start_3 = timeit.default_timer() # Start timer
|
76 |
st.write(f"向量库构建开始:{start_3}")
|
77 |
+
new_index = VectorStoreIndex.from_documents(documents, service_context=st.session_state.service_context)
|
78 |
end_3 = timeit.default_timer() # Start timer
|
79 |
st.write(f"向量库构建结束:{end_3}")
|
80 |
st.write(f"向量库构建耗时:{end_3 - start_3}")
|
81 |
directory_path = generate_random_string(20)
|
82 |
os.makedirs(directory_path)
|
83 |
+
new_index.storage_context.persist("directory_path")
|
84 |
storage_context = StorageContext.from_defaults(persist_dir="directory_path")
|
85 |
start_4 = timeit.default_timer() # Start timer
|
86 |
st.write(f"向量库装载开始:{start_4}")
|