QA-GoT / app.py
Ono-Enzo's picture
Update app.py
f51c555 verified
import os
import streamlit as st
import requests
import zipfile
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.pipelines.standard_pipelines import TextIndexingPipeline
from haystack.components.readers import ExtractiveReader
from haystack.pipelines import ExtractiveQAPipeline
from pydantic import BaseModel
# Função para permitir tipos arbitrários no Pydantic
class Config(BaseModel):
class Config:
arbitrary_types_allowed = True
# Function to initialize Document Store
def download_and_extract(url, output_dir):
"""
Baixa um arquivo zip de uma URL e o extrai em um diretório especificado.
"""
zip_path = os.path.join(output_dir, "temp.zip")
# Cria o diretório de destino, se não existir
os.makedirs(output_dir, exist_ok=True)
# Baixa o arquivo zip
response = requests.get(url)
with open(zip_path, "wb") as file:
file.write(response.content)
# Extrai o conteúdo do zip
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(output_dir)
# Remove o zip após a extração
os.remove(zip_path)
def initializing_Document_Store():
doc_dir = "data/build_your_first_question_answering_system"
download_and_extract(
url="https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt1.zip",
output_dir=doc_dir,
)
# Initialize DocumentStore
document_store = InMemoryDocumentStore(embedding_dim=384)
# Configure the Retriever with dense embeddings
retriever = initializing_Retriever(document_store)
# Index the documents
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline = TextIndexingPipeline(document_store)
indexing_pipeline.run_batch(file_paths=files_to_index)
# Update embeddings in DocumentStore
document_store.update_embeddings(retriever)
return document_store, retriever
# Function to initialize the Retriever
def initializing_Retriever(document_store):
retriever = InMemoryEmbeddingRetriever(
document_store=document_store,
embedding_model="sentence-transformers/all-MiniLM-L6-v2"
)
return retriever
# Function to initialize the Reader
def initializing_Reader():
reader = ExtractiveReader(model="deepset/roberta-base-squad2")
return reader
# Initializing components
document_store, retriever = initializing_Document_Store()
reader = initializing_Reader()
reader.warm_up()
pipe = Pipeline()
pipe.add_component(instance=retriever, name="retriever")
pipe.add_component(instance=reader, name="reader")
pipe.connect("retriever.documents", "reader.documents")
# User interaction via Streamlit
st.title("Ask about Game of Thrones!")
user_query = st.text_input(
label="Ask a question about Game of Thrones!",
placeholder="Who is the King in the North?",
)
if user_query:
# Get the answers
with st.spinner("Searching for an answer..."):
try:
# Use the pipeline to find the answer
answer = pipeline.run(
data={
"retriever": {"query": question, "top_k": 10},
"reader": {"query": question, "top_k": top_k},
}
)
# Display the answers
for idx, ans in enumerate(answer["answers"]):
st.info(
f"""
Answer {idx + 1}: "{ans.answer}" | Score: {ans.score:0.4f}
Document: "{ans.document.meta['title']}"
URL: {ans.document.meta['url']}
"""
)
with st.expander("See details", expanded=False):
st.write(ans)
st.divider()
except Exception as e:
st.error("Sorry, we couldn't find an answer to your question.")