File size: 5,794 Bytes
2b5aeed
 
 
 
 
 
 
 
 
792e26a
 
 
2b5aeed
792e26a
2b5aeed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792e26a
 
2b5aeed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792e26a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a0d31f
792e26a
 
 
 
 
 
2b5aeed
792e26a
9a0d31f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, AIMessageChunk, AIMessage
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import os
from langchain_core.chat_history import InMemoryChatMessageHistory, BaseChatMessageHistory
import time

from langgraph.errors import GraphRecursionError

from graph import get_graph
from langchain_core.runnables import RunnableConfig

if 'read_file' not in st.session_state:
    st.session_state.read_file = False
    st.session_state.retriever = None

if 'chat_history' not in st.session_state:
    st.session_state.chat_history = {}
    st.session_state.first_msg = True

def get_session_by_id(session_id: str) -> BaseChatMessageHistory:
    if session_id not in st.session_state.chat_history:
        st.session_state.chat_history[session_id] = InMemoryChatMessageHistory()
        return st.session_state.chat_history[session_id]
    return st.session_state.chat_history[session_id]

if not st.session_state.read_file:
    st.title('πŸ€“ Upload your PDF to talk with it', anchor=False)
    file = st.file_uploader('Upload a PDF file', type='pdf')
    if file:
        with st.status('πŸ€— Booting up the things!', expanded=True):
            with st.spinner('πŸ“ Uploading the PDF...', show_time=True):
                with open('file.pdf', 'wb') as f:
                    f.write(file.read())
                    loader = PyPDFLoader('file.pdf')
                    documents = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200))
                st.success('πŸ“ File uploaded successfully!!!')
            with st.spinner('🧐 Reading the file...', show_time=True):
                vstore = InMemoryVectorStore.from_documents(documents, HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2'))
                st.session_state.retriever = vstore.as_retriever()
                st.success('🧐 File read successfully!!!')
            os.remove('file.pdf')
            with st.spinner('😴 Waking up the LLM...', show_time=True):
                st.session_state.graph = get_graph(st.session_state.retriever)
                st.success('😁 LLM awakened!!!')
            st.balloons()
        placeholder = st.empty()
        for _ in range(5, -1, -1):
            placeholder.write(f'⏳ Chat starting in 0{_} sec.')
            time.sleep(1)
        st.session_state.read_file = True
        st.rerun()

if st.session_state.read_file:

    st.title('πŸ€— DocAI', anchor=False)
    st.subheader('Chat with your document!', anchor=False)

    if st.session_state.first_msg:
        st.session_state.first_msg = False
        get_session_by_id('chat42').add_message(AIMessage(content='Hello, how are you? How about we talk about the '
                                                                  'document you sent me to read?'))

    for msg in get_session_by_id('chat42').messages:
        with st.chat_message(name='user' if isinstance(msg, HumanMessage) else 'ai'):
            st.write(msg.content)

    prompt = st.chat_input('Try to ask something about your file!')
    if prompt:
        with st.chat_message(name='user'):
            st.write(prompt)

        response = st.session_state.graph.stream(
            {
                'question': prompt,
                'scratchpad': None,
                'answer': None,
                'next_node': None,
                'history': get_session_by_id('chat42').messages,
            },
            stream_mode='messages',
            config = RunnableConfig(recursion_limit=4)
        )

        get_session_by_id('chat42').add_message(HumanMessage(content=prompt))

        def get_message():
            for chunk, _ in response:
                if chunk.content and isinstance(chunk, AIMessageChunk):
                    yield chunk.content

        with st.chat_message(name='ai'):
            full_response = ''
            tool_placeholder = st.empty()
            placeholders = {}
            prompt_message_placeholder = st.empty()

            try:
                for msg in get_message():
                    full_response += msg
                    if '<tool>' in full_response:
                        with tool_placeholder.status('Reading document...', expanded=True):
                            if 'tool_message_placeholder' not in placeholders:
                                placeholders['tool_message_placeholder'] = st.empty()
                            placeholders['tool_message_placeholder'].write(full_response
                                                                           .replace('<tool>', '')
                                                                           .replace('</tool>', '')
                                                                           .replace('retriever', 'Retrieving document'))
                            prompt_message_placeholder.empty()
                    if '</tool>' in full_response:
                        full_response = ''
                        continue
                    else:
                        prompt_message_placeholder.write(full_response.replace('$', '\$'))
            except GraphRecursionError:
                message = 'NΓ£o consegui responder a sua pergunta. πŸ˜₯ Poderia me perguntar outra coisa?'
                full_response = ''
                for letter in message:
                    full_response += letter
                    time.sleep(0.015)
                    prompt_message_placeholder.write(full_response)

        get_session_by_id('chat42').add_message(AIMessage(content=full_response.replace('$', '\$')))