Spaces:

themanas021
/

Rag-test-truffles

Sleeping

File size: 3,234 Bytes

2ed9eb6
 
 
 
 
 
 
880d9dc
 
 
2ed9eb6
 
bcc2ddb
 
2ed9eb6
 
bcc2ddb
2ed9eb6
 
 
 
 
 
 
 
 
 
 
 
4f28e79
 
2ed9eb6
4f28e79
71fe4db
 
4f28e79
 
2ed9eb6
4f28e79
2ed9eb6
 
 
 
 
 
 
 
60ddc99
2ed9eb6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcc2ddb
 
 
 
 
 
 
 
 
 
 
 
 
 
ac69353
bcc2ddb

import streamlit as st
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
import chromadb

chromadb.api.client.SharedSystemClient.clear_system_cache()

import os
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

# Initialize the embeddings and model
embd = OpenAIEmbeddings()
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

# Initialize conversation history
if "conversation_history" not in st.session_state:
    st.session_state.conversation_history = []

# Define the Streamlit app
st.title("Text File Question-Answering with History")
st.subheader("Upload a text file and ask questions. The app will maintain a conversation history.")

# File upload section
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])

from langchain.docstore.document import Document

if uploaded_file:
    # Read and decode the content of the uploaded file
    file_content = uploaded_file.read().decode("utf-8")
    
    # Convert the content into a LangChain document
    document = [Document(page_content=file_content)]
    
    # Split the loaded document
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    doc_splits = text_splitter.split_documents(document)
    
    # Create a vector store
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name="conversation_history",
        embedding=embd,
        persist_directory=None
    )
    retriever = vectorstore.as_retriever()
    
    # Initialize the QA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
    )

    # Question-answering section
    query = st.text_input("Ask a question:")
    
    if query:
        # Process the query
        result = qa_chain({"query": query})
        answer = result["result"]
        sources = result["source_documents"]
        
        # Append to conversation history
        st.session_state.conversation_history.append((query, answer, sources))
        
        # Display the current answer
        st.write("**Answer:**", answer)
        
        # Display the sources
        st.subheader("Source Documents")
        for i, doc in enumerate(sources, start=1):
            st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
            st.write(doc.page_content[:500])  # Display the first 500 characters of the source content
        
        # Display conversation history
        st.subheader("Conversation History")
        for idx, (q, a, s) in enumerate(st.session_state.conversation_history, 1):
            st.write(f"**Q{idx}:** {q}")
            st.write(f"**A{idx}:** {a}")
            st.write(f"**Sources for Q{idx}:**")
            for i, doc in enumerate(s, start=1):
                st.write(f"**Source {i}:** {doc.metadata.get('source', 'Unknown Source')}")
                st.write(doc.page_content[:300])  # Show a snippet for brevity