Spaces:

jpangas
/

gradio-extractor

Sleeping

App Files Files Community

jpangas commited on Jan 21

Commit

688c931

verified ·

1 Parent(s): eac086e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -16,12 +16,29 @@ import xmltodict
 qa_graph = None
 current_file = None
 class State(TypedDict):
     question: str
     context: List[Document]
     answer: str
 def initiate_graph(file):
     global qa_graph, current_file
@@ -70,7 +87,6 @@ def initiate_graph(file):
     name = file.name.split("/")[-1]
     return f"The paper {name} has been loaded and is ready for questions!"
 def answer_question(question, history):
     global qa_graph, current_file
@@ -80,7 +96,6 @@ def answer_question(question, history):
     response = qa_graph.invoke({"question": question})
     return response["answer"]
 def slow_echo(message, history):
     answer = answer_question(message, history)
     if answer == "Please upload a PDF file first!":
@@ -91,20 +106,22 @@ def slow_echo(message, history):
         time.sleep(0.01)
         yield answer[: i + 1]
-with gr.Blocks() as demo:
-    file_input = gr.File(
-        label="Upload a research paper as a pdf file wait for it to be loaded",
-        file_types=[".pdf"],
-    )
-    textbox = gr.Textbox(
-        label="Status of Upload", value="No Paper Uploaded", interactive=False
-    )
-    chat_interface = gr.ChatInterface(slow_echo, type="messages")
-    file_input.upload(fn=initiate_graph, inputs=file_input, outputs=textbox)
-demo.queue()

 qa_graph = None
 current_file = None
 class State(TypedDict):
     question: str
     context: List[Document]
     answer: str
+def get_extra_docs(file_name):
+    # TODO: Add the code to extract the title, authors and abstract from the PDF file
+    client = GrobidClient(config_path="./config.json")
+    information = client.process_pdf(
+        "processHeaderDocument",
+        file_name,
+        generateIDs=False,
+        consolidate_header=False,
+        consolidate_citations=False,
+        include_raw_citations=False,
+        include_raw_affiliations=False,
+        tei_coordinates=False,
+        segment_sentences=False,
+    )
+    dict_information = xmltodict.parse(information[2])
+    title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
+    abstract = dict_information["tei"]["teiHeader"]["profileDesc"]["abstract"]["p"]
+    return title
 def initiate_graph(file):
     global qa_graph, current_file
     name = file.name.split("/")[-1]
     return f"The paper {name} has been loaded and is ready for questions!"
 def answer_question(question, history):
     global qa_graph, current_file
     response = qa_graph.invoke({"question": question})
     return response["answer"]
 def slow_echo(message, history):
     answer = answer_question(message, history)
     if answer == "Please upload a PDF file first!":
         time.sleep(0.01)
         yield answer[: i + 1]
+def main():
+    with gr.Blocks() as demo:
+        file_input = gr.File(
+            label="Upload a research paper as a pdf file and wait for it to be loaded",
+            file_types=[".pdf"],
+        )
+        textbox = gr.Textbox(
+            label="Status of Upload", value="No Paper Uploaded", interactive=False
+        )
+        chat_interface = gr.ChatInterface(slow_echo, type="messages")
+        file_input.upload(fn=initiate_graph, inputs=file_input, outputs=textbox)
+    demo.queue().launch()
+if __name__ == "__main__":
+    main()