File size: 4,538 Bytes
d3a1fe2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# app interface related
import gradio as gr
import shutil
import tempfile
from pathlib import Path
import time

# ai related
from Chroma import create_db
from LangChain import query, load_chain


# function to store the state
def load_data(
    chunk_size,
    chunk_overlap,
    uploaded_files,
    existing_data,
    progress=gr.Progress(),
):
    try:
        progress(0, desc="Loading chain...")
        time.sleep(0.5)
        print("Loading chain...")
        # chain load
        chain = load_chain()
        progress(0.3, desc="Chain loaded")
        time.sleep(0.5)
        print("Chain loaded")

        print("Creating db...")
        # clean up previous temporary directory if it exists
        if existing_data and "temp_dir" in existing_data:
            shutil.rmtree(existing_data["temp_dir"])

        # create new consolidated temporary directory
        temp_dir = tempfile.mkdtemp()

        print(f"Copying files to {temp_dir}...")
        # preserve original directory structure
        for i, uploaded_file in enumerate(uploaded_files, 1):
            src_path = Path(uploaded_file.name)
            # move file to consolidated directory
            shutil.move(src_path, temp_dir)
            # update progress bar
            progress(
                0.3 + 0.2 * i / len(uploaded_files), f"Processing {uploaded_file.name.split('/')[-1]}"
            )
            time.sleep(0.1)

        # create db file
        progress(0.5, desc="Creating db...")
        db = create_db(chunk_size, chunk_overlap, INPUT_PATH=temp_dir, CHROMA_PATH=temp_dir)
        progress(1.0, desc="DB created")
        print("DB created")

        return {
            "db": db,
            "chain": chain,
            "temp_dir": temp_dir,
            "loaded": True,
            "file_count": len(uploaded_files),
        }, "βœ… Data loaded successfully!"
    except Exception as e:
        return {"loaded": False, "error": str(e)}, f"❌ Error: {str(e)}"


def chat_response(message, chat_history, data):
    if not data or not data.get("loaded"):
        error_msg = data.get("error", "Please load data first!")
        chat_history.append((message, error_msg))
        return chat_history

    # responses based on the input data
    answer, sources = query(message, data["db"], data["chain"])
    sources = "\n".join([s_file.split("/")[-1] for s_file in sources.split("\n")])
    response = f"{answer}\n\nSources:\n{sources}"

    # Append messages as tuples (user, assistant) instead of dictionaries
    chat_history.append((message, response))
    return chat_history


with gr.Blocks(title="Document Analysis Chatbot") as demo:
    # store loaded data
    data_store = gr.State()

    with gr.Row():
        # Left Column - Inputs
        with gr.Column(scale=1):
            gr.Markdown("## Data Upload")
            # create db parameters
            chunk_size = gr.Number(label="Chunk Size", value=1000)
            chunk_overlap = gr.Number(label="Chunk Overlap", value=500)
            # load file
            folder_input = gr.File(file_count="directory", label="Upload Folder")
            # Add status display
            status_text = gr.Textbox(
                label="Status",
                interactive=False,
                show_label=False
            )
            # load button
            load_btn = gr.Button("Load Data", variant="primary")

        # Right Column - Chat
        with gr.Column(scale=3, visible=False) as chat_col:
            gr.Markdown("## Chat Interface")
            chatbot = gr.Chatbot(
                label="Document Analysis Chat",
                type="tuples",
                bubble_full_width=False,  # Prevent stretching of messages
                render_markdown=True,  # Handle markdown formatting properly,
                height=500,
            )
            msg = gr.Textbox(label="Your Question", placeholder="Type your question...")
            clear_btn = gr.Button("Clear Chat", variant="secondary")

    # Loading indicators - update to handle multiple outputs
    load_btn.click(
        fn=load_data,
        inputs=[chunk_size, chunk_overlap, folder_input, data_store],
        outputs=[data_store, status_text],
    ).then(fn=lambda: gr.Column(visible=True), outputs=chat_col)

    # Chat interaction
    msg.submit(
        fn=chat_response,
        inputs=[msg, chatbot, data_store],
        outputs=[chatbot],
    ).then(lambda: "", None, msg)

    # Clear chat
    clear_btn.click(lambda: [], None, chatbot)

if __name__ == "__main__":
    demo.launch()