mgw / my_ghost_writer /thesaurus.py
alessandro trinca tornidor
feat: handle NLTK wordnet preloading within /health-wordnet
a901fdc
from datetime import datetime
from nltk.corpus import wordnet31 as wn
from my_ghost_writer.constants import app_logger
from my_ghost_writer.type_hints import ResponseWordsAPI
def get_current_info_wordnet(preload_wordnet=False):
if preload_wordnet:
t0_preload = datetime.now()
wn.synsets("preload") # prelaod wordnet
t1_preload = datetime.now()
duration_preload = (t1_preload - t0_preload).total_seconds()
app_logger.info(f"wordnet sysnet preloaded in {duration_preload:3f}s.")
return {"languages": wn.langs(), "version": wn.get_version(), "preload_wordnet": preload_wordnet}
def get_synsets_by_word_and_language(word: str, lang: str = "eng") -> ResponseWordsAPI:
app_logger.info("start...")
def lemma_names(synsets):
return sorted(
set(
lemma.name().replace('_', ' ')
for syn in synsets
for lemma in syn.lemmas(lang=lang)
)
)
def lemma_related(lemmas_input, lemmas_method):
return sorted(
set(
rel.name().replace('_', ' ')
for lemma in lemmas_input
for rel in getattr(lemma, lemmas_method)()
)
)
results = []
for synset in wn.synsets(word, lang=lang):
lemmas = synset.lemmas(lang=lang)
obj = {"definition": synset.definition(lang=lang)}
# Single-line fields
synonyms = sorted(
set(
lemma.name().replace('_', ' ')
for lemma in lemmas
if lemma.name().lower() != word.lower()
)
)
if synonyms:
obj["synonyms"] = synonyms
# Lemma-based relations
for field, method in [
("antonyms", "antonyms"),
("derivation", "derivationally_related_forms"),
("pertainsTo", "pertainyms"),
]:
values = lemma_related(lemmas, method)
if values:
obj[field] = values
# Synset-based relations
synset_relations = [
("typeOf", synset.hypernyms()),
("hasTypes", synset.hyponyms()),
("partOf", synset.member_holonyms() + synset.part_holonyms() + synset.substance_holonyms()),
("hasParts", synset.member_meronyms() + synset.part_meronyms() + synset.substance_meronyms()),
("instanceOf", synset.instance_hypernyms()),
("hasInstances", synset.instance_hyponyms()),
("similarTo", synset.similar_tos()),
("also", synset.also_sees()),
("entails", synset.entailments()),
("hasSubstances", synset.substance_meronyms()),
("inCategory", synset.topic_domains()),
("usageOf", synset.usage_domains()),
("causes", synset.causes()),
("verbGroups", synset.verb_groups()),
]
for field, syns in synset_relations:
values = lemma_names(syns)
if values:
obj[field] = values
results.append(obj)
return {
"word": word,
"results": results
}