Spaces:
Running
Running
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
unit tests: | |
* extract the top-k entities from a raw text | |
see copyright/license https://huggingface.co/spaces/DerwenAI/textgraphs/blob/main/README.md | |
""" | |
from os.path import abspath, dirname | |
import pathlib | |
import sys | |
sys.path.insert(0, str(pathlib.Path(dirname(dirname(abspath(__file__)))))) | |
import textgraphs # pylint: disable=C0413 | |
def test_extract_herzog ( | |
) -> None: | |
""" | |
Run an extract with the Werner Herzog blurb. | |
""" | |
text: str = """ | |
Werner Herzog is a remarkable filmmaker and intellectual originally from Germany, the son of Dietrich Herzog. | |
""" | |
tg: textgraphs.TextGraphs = textgraphs.TextGraphs( # pylint: disable=C0103 | |
factory = textgraphs.PipelineFactory(), | |
) | |
pipe: textgraphs.Pipeline = tg.create_pipeline( | |
text.strip(), | |
) | |
tg.collect_graph_elements( | |
pipe, | |
debug = False, | |
) | |
tg.perform_entity_linking( | |
pipe, | |
debug = False, | |
) | |
tg.construct_lemma_graph( | |
debug = False, | |
) | |
tg.calc_phrase_ranks( | |
debug = False, | |
) | |
results: list = [ | |
( row["text"], row["pos"], ) | |
for _, row in tg.get_phrases_as_df().iterrows() | |
] | |
# top-k, k=4 | |
results = results[:4] | |
expects: list = [ | |
("Germany", "PROPN"), | |
("Werner Herzog", "PROPN"), | |
("Dietrich Herzog", "PROPN"), | |
] | |
for pair in expects: | |
assert pair in results | |
if __name__ == "__main__": | |
test_extract_herzog() | |