File size: 3,234 Bytes
2ed9eb6
 
 
 
 
 
 
880d9dc
 
 
2ed9eb6
 
bcc2ddb
 
2ed9eb6
 
bcc2ddb
2ed9eb6
 
 
 
 
 
 
 
 
 
 
 
4f28e79
 
2ed9eb6
4f28e79
71fe4db
 
4f28e79
 
2ed9eb6
4f28e79
2ed9eb6
 
 
 
 
 
 
 
60ddc99
2ed9eb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcc2ddb
 
 
 
 
 
 
 
 
 
 
 
 
 
ac69353
bcc2ddb
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import streamlit as st
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
import chromadb

chromadb.api.client.SharedSystemClient.clear_system_cache()

import os
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

# Initialize the embeddings and model
embd = OpenAIEmbeddings()
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

# Initialize conversation history
if "conversation_history" not in st.session_state:
    st.session_state.conversation_history = []

# Define the Streamlit app
st.title("Text File Question-Answering with History")
st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.")

# File upload section
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])

from langchain.docstore.document import Document

if uploaded_file:
    # Read and decode the content of the uploaded file
    file_content = uploaded_file.read().decode("utf-8")
    
    # Convert the content into a LangChain document
    document = [Document(page_content=file_content)]
    
    # Split the loaded document
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    doc_splits = text_splitter.split_documents(document)
    
    # Create a vector store
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name="conversation_history",
        embedding=embd,
        persist_directory=None
    )
    retriever = vectorstore.as_retriever()
    
    # Initialize the QA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
    )

    # Question-answering section
    query = st.text_input("Ask a question:")
    
    if query:
        # Process the query
        result = qa_chain({"query": query})
        answer = result["result"]
        sources = result["source_documents"]
        
        # Append to conversation history
        st.session_state.conversation_history.append((query, answer, sources))
        
        # Display the current answer
        st.write("**Answer:**", answer)
        
        # Display the sources
        st.subheader("Source Documents")
        for i, doc in enumerate(sources, start=1):
            st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
            st.write(doc.page_content[:500])  # Display the first 500 characters of the source content
        
        # Display conversation history
        st.subheader("Conversation History")
        for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1):
            st.write(f"**Q{idx}:** {q}")
            st.write(f"**A{idx}:** {a}")
            st.write(f"**Sources for Q{idx}:**")
            for i, doc in enumerate(s, start=1):
                st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
                st.write(doc.page_content[:300])  # Show a snippet for brevity