Spaces:
Running
Running
import streamlit as st | |
from haystack.nodes.connector import Crawler | |
from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.pipelines import ExtractiveQAPipeline | |
from haystack.nodes import FARMReader, TfidfRetriever | |
import validators | |
import json | |
doc_dir = './article_txt_got' | |
document_store = InMemoryDocumentStore() | |
docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True) | |
st.write(docs[1]) |