Spaces:
Runtime error
Runtime error
| from haystack.schema import Document | |
| from haystack.document_stores import BaseDocumentStore | |
| import uuid | |
| def format_docs(documents): | |
| """Given a list of documents, format the documents and return the documents and doc ids.""" | |
| db_docs: list = [] | |
| for doc in documents: | |
| doc_id = doc["id"] if doc["id"] is not None else str(uuid.uuid4()) | |
| db_doc = { | |
| "content": doc["text"], | |
| "content_type": "text", | |
| "id": str(uuid.uuid4()), | |
| "meta": {"id": doc_id}, | |
| } | |
| db_docs.append(Document(**db_doc)) | |
| return db_docs, [doc.meta["id"] for doc in db_docs] | |
| def index(documents, pipeline, clear_index=True): | |
| documents, doc_ids = format_docs(documents) | |
| if clear_index: | |
| document_stores = pipeline.get_nodes_by_class(class_type=BaseDocumentStore) | |
| for docstore in document_stores: | |
| docstore.delete_index(docstore.index) | |
| pipeline.run(documents=documents) | |
| return doc_ids | |
| def search(queries, pipeline): | |
| results = [] | |
| matches_queries = pipeline.run_batch(queries=queries) | |
| for matches in matches_queries["documents"]: | |
| query_results = [] | |
| score_is_empty = False | |
| for res in matches: | |
| if not score_is_empty: | |
| score_is_empty = True if res.score is None else False | |
| match = { | |
| "text": res.content, | |
| "id": res.meta["id"], | |
| "fragment_id": res.id, | |
| "meta": res.meta, | |
| } | |
| if not score_is_empty: | |
| match.update({"score": res.score}) | |
| if hasattr(res, "content_audio"): | |
| match.update({"content_audio": res.content_audio}) | |
| query_results.append(match) | |
| if not score_is_empty: | |
| query_results = sorted( | |
| query_results, key=lambda x: x["score"], reverse=True | |
| ) | |
| results.append(query_results) | |
| return results | |