jpangas commited on
Commit
9807ccc
·
verified ·
1 Parent(s): 17c8aa2

Update the app with a better UI

Browse files
Files changed (1) hide show
  1. app.py +98 -18
app.py CHANGED
@@ -1,30 +1,110 @@
1
- import gradio as gr
2
  from grobid_client.grobid_client import GrobidClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
 
 
 
4
 
5
- def extract_text(file):
6
- client = GrobidClient(config_path="./config.json")
7
- information = client.process_pdf(
8
- "processFulltextDocument",
9
  file.name,
10
- generateIDs=False,
11
- consolidate_header=False,
12
- consolidate_citations=False,
13
- include_raw_citations=False,
14
- include_raw_affiliations=False,
15
- tei_coordinates=False,
16
- segment_sentences=False,
17
  )
18
- return information
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
- # Ask Dr Ahmad about which LLM to use and if we have a token for it
22
  with gr.Blocks() as demo:
23
  file_input = gr.File(
24
- label="Upload a research paper as a pdf file", file_types=[".pdf"]
 
 
 
 
 
25
  )
26
- text_output = gr.Textbox(label="Extracted Text")
27
 
28
- file_input.upload(fn=extract_text, inputs=file_input, outputs=text_output)
 
 
 
29
 
30
- demo.launch()
 
 
1
  from grobid_client.grobid_client import GrobidClient
2
+ import time
3
+ import gradio as gr
4
+ from langchain_community.document_loaders.generic import GenericLoader
5
+ from langchain_community.document_loaders.parsers import GrobidParser
6
+ from langchain_openai import OpenAIEmbeddings
7
+ from langchain_core.vectorstores import InMemoryVectorStore
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain import hub
10
+ from langchain_core.documents import Document
11
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
12
+ from langgraph.graph import START, StateGraph
13
+ from typing_extensions import List, TypedDict
14
+ import xmltodict
15
+
16
+ qa_graph = None
17
+ current_file = None
18
+
19
+
20
+ class State(TypedDict):
21
+ question: str
22
+ context: List[Document]
23
+ answer: str
24
+
25
+
26
+ def initiate_graph(file):
27
+ global qa_graph, current_file
28
 
29
+ if current_file != file.name:
30
+ qa_graph = None
31
+ current_file = file.name
32
 
33
+ loader = GenericLoader.from_filesystem(
 
 
 
34
  file.name,
35
+ parser=GrobidParser(
36
+ segment_sentences=False,
37
+ grobid_server="https://jpangas-grobid-paper-extractor.hf.space/api/processFulltextDocument",
38
+ ),
 
 
 
39
  )
40
+
41
+ docs = loader.load()
42
+
43
+ embeddings = OpenAIEmbeddings()
44
+ vector_store = InMemoryVectorStore(embeddings)
45
+
46
+ llm = ChatOpenAI(model="gpt-4o-mini")
47
+ text_splitter = RecursiveCharacterTextSplitter(
48
+ chunk_size=1000, chunk_overlap=200, add_start_index=True
49
+ )
50
+ all_splits = text_splitter.split_documents(docs)
51
+ vector_store.add_documents(documents=all_splits)
52
+ prompt = hub.pull("rlm/rag-prompt")
53
+
54
+ def retrieve(state: State):
55
+ retrieved_docs = vector_store.similarity_search(state["question"])
56
+ return {"context": retrieved_docs}
57
+
58
+ def generate(state: State):
59
+ docs_content = "\n\n".join(doc.page_content for doc in state["context"])
60
+ messages = prompt.invoke(
61
+ {"question": state["question"], "context": docs_content}
62
+ )
63
+ response = llm.invoke(messages)
64
+ return {"answer": response.content}
65
+
66
+ graph_builder = StateGraph(State).add_sequence([retrieve, generate])
67
+ graph_builder.add_edge(START, "retrieve")
68
+ qa_graph = graph_builder.compile()
69
+
70
+ name = file.name.split("/")[-1]
71
+ return f"The paper {name} has been loaded and is ready for questions!"
72
+
73
+
74
+ def answer_question(question, history):
75
+ global qa_graph, current_file
76
+
77
+ if qa_graph is None:
78
+ return "Please upload a PDF file first and wait for it to be loaded!"
79
+
80
+ response = qa_graph.invoke({"question": question})
81
+ return response["answer"]
82
+
83
+
84
+ def slow_echo(message, history):
85
+ answer = answer_question(message, history)
86
+ if answer == "Please upload a PDF file first!":
87
+ yield answer
88
+ return
89
+
90
+ for i in range(len(answer)):
91
+ time.sleep(0.01)
92
+ yield answer[: i + 1]
93
 
94
 
 
95
  with gr.Blocks() as demo:
96
  file_input = gr.File(
97
+ label="Upload a research paper as a pdf file wait for it to be loaded",
98
+ file_types=[".pdf"],
99
+ )
100
+
101
+ textbox = gr.Textbox(
102
+ label="Status of Upload", value="No Paper Uploaded", interactive=False
103
  )
 
104
 
105
+ chat_interface = gr.ChatInterface(slow_echo, type="messages")
106
+
107
+ file_input.upload(fn=initiate_graph, inputs=file_input, outputs=textbox)
108
+
109
 
110
+ demo.queue()