andreasmartin commited on
Commit
67bfb80
·
unverified ·
1 Parent(s): 7272f4f
Files changed (2) hide show
  1. app.py +2 -7
  2. util.py +6 -2
app.py CHANGED
@@ -24,13 +24,8 @@ async def ask_api(request: AskRequest):
24
 
25
  @app.post("/api/v2/ask")
26
  async def ask_api(request: AskRequest):
27
- faq_id = util.get_id(request.sheet_url)
28
- xlsx_url = util.xlsx_url(faq_id)
29
- df = util.read_df(xlsx_url)
30
- df_update = util.split_page_breaks(df, request.page_content_column)
31
- documents = faq.create_documents(df_update, request.page_content_column)
32
- embedding_function = faq.define_embedding_function("sentence-transformers/all-mpnet-base-v2")
33
- vectordb = faq.get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents, vectordb_type=faq.VECTORDB_TYPE.Chroma)
34
  documents = faq.similarity_search(vectordb, request.question, k=request.k)
35
  df_doc = util.transform_documents_to_dataframe(documents)
36
  df_filter = util.remove_duplicates_by_column(df_doc, "ID")
 
24
 
25
  @app.post("/api/v2/ask")
26
  async def ask_api(request: AskRequest):
27
+ util.SPLIT_PAGE_BREAKS = True
28
+ vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
 
 
 
 
 
29
  documents = faq.similarity_search(vectordb, request.question, k=request.k)
30
  df_doc = util.transform_documents_to_dataframe(documents)
31
  df_filter = util.remove_duplicates_by_column(df_doc, "ID")
util.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
4
  SHEET_URL_Y = "/edit#gid="
5
  SHEET_URL_Y_EXPORT = "/export?gid="
 
6
 
7
 
8
  def get_id(sheet_url: str) -> str:
@@ -16,8 +17,11 @@ def xlsx_url(get_id: str) -> str:
16
  return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
17
 
18
 
19
- def read_df(xlsx_url: str) -> pd.DataFrame:
20
- return pd.read_excel(xlsx_url, header=0, keep_default_na=False)
 
 
 
21
 
22
 
23
  def split_page_breaks(df, column_name):
 
3
  SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
4
  SHEET_URL_Y = "/edit#gid="
5
  SHEET_URL_Y_EXPORT = "/export?gid="
6
+ SPLIT_PAGE_BREAKS = False
7
 
8
 
9
  def get_id(sheet_url: str) -> str:
 
17
  return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
18
 
19
 
20
+ def read_df(xlsx_url: str, split_page_breaks: bool = SPLIT_PAGE_BREAKS) -> pd.DataFrame:
21
+ df = pd.read_excel(xlsx_url, header=0, keep_default_na=False)
22
+ if split_page_breaks:
23
+ df = split_page_breaks(df, page_content_column)
24
+ return df
25
 
26
 
27
  def split_page_breaks(df, column_name):