Spaces:
Sleeping
Sleeping
Update
Browse files
app.py
CHANGED
@@ -24,13 +24,8 @@ async def ask_api(request: AskRequest):
|
|
24 |
|
25 |
@app.post("/api/v2/ask")
|
26 |
async def ask_api(request: AskRequest):
|
27 |
-
|
28 |
-
|
29 |
-
df = util.read_df(xlsx_url)
|
30 |
-
df_update = util.split_page_breaks(df, request.page_content_column)
|
31 |
-
documents = faq.create_documents(df_update, request.page_content_column)
|
32 |
-
embedding_function = faq.define_embedding_function("sentence-transformers/all-mpnet-base-v2")
|
33 |
-
vectordb = faq.get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents, vectordb_type=faq.VECTORDB_TYPE.Chroma)
|
34 |
documents = faq.similarity_search(vectordb, request.question, k=request.k)
|
35 |
df_doc = util.transform_documents_to_dataframe(documents)
|
36 |
df_filter = util.remove_duplicates_by_column(df_doc, "ID")
|
|
|
24 |
|
25 |
@app.post("/api/v2/ask")
|
26 |
async def ask_api(request: AskRequest):
|
27 |
+
util.SPLIT_PAGE_BREAKS = True
|
28 |
+
vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
|
|
|
|
|
|
|
|
|
|
|
29 |
documents = faq.similarity_search(vectordb, request.question, k=request.k)
|
30 |
df_doc = util.transform_documents_to_dataframe(documents)
|
31 |
df_filter = util.remove_duplicates_by_column(df_doc, "ID")
|
util.py
CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
|
|
3 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
4 |
SHEET_URL_Y = "/edit#gid="
|
5 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
|
|
6 |
|
7 |
|
8 |
def get_id(sheet_url: str) -> str:
|
@@ -16,8 +17,11 @@ def xlsx_url(get_id: str) -> str:
|
|
16 |
return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
|
17 |
|
18 |
|
19 |
-
def read_df(xlsx_url: str) -> pd.DataFrame:
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
def split_page_breaks(df, column_name):
|
|
|
3 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
4 |
SHEET_URL_Y = "/edit#gid="
|
5 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
6 |
+
SPLIT_PAGE_BREAKS = False
|
7 |
|
8 |
|
9 |
def get_id(sheet_url: str) -> str:
|
|
|
17 |
return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
|
18 |
|
19 |
|
20 |
+
def read_df(xlsx_url: str, split_page_breaks: bool = SPLIT_PAGE_BREAKS) -> pd.DataFrame:
|
21 |
+
df = pd.read_excel(xlsx_url, header=0, keep_default_na=False)
|
22 |
+
if split_page_breaks:
|
23 |
+
df = split_page_breaks(df, page_content_column)
|
24 |
+
return df
|
25 |
|
26 |
|
27 |
def split_page_breaks(df, column_name):
|