Spaces:
Sleeping
Sleeping
| """This module contains utility functions for the project""" | |
| import mmh3 | |
| from haystack import Document | |
| def get_unique_docs(dataset, unique_docs: set): | |
| """Get unique documents from dataset | |
| Args: | |
| dataset: list of dictionaries | |
| Returns: | |
| docs: list of haystack.Document | |
| """ | |
| docs = list() | |
| for doc in dataset: | |
| if doc["context"] is not None and doc["context_id"] not in unique_docs: | |
| unique_docs.add(doc["context_id"]) | |
| document = Document( | |
| content=doc["context"], | |
| meta={ | |
| "title": doc["context_title"], | |
| "context_id": doc["context_id"], | |
| "url": doc["url"], | |
| "source": "QASports", | |
| }, | |
| ) | |
| docs.append(document) | |
| return docs | |