|
from datetime import datetime |
|
|
|
from nltk.corpus import wordnet31 as wn |
|
|
|
from my_ghost_writer.constants import app_logger |
|
from my_ghost_writer.type_hints import ResponseWordsAPI |
|
|
|
|
|
def get_current_info_wordnet(preload_wordnet=False): |
|
if preload_wordnet: |
|
t0_preload = datetime.now() |
|
wn.synsets("preload") |
|
t1_preload = datetime.now() |
|
duration_preload = (t1_preload - t0_preload).total_seconds() |
|
app_logger.info(f"wordnet sysnet preloaded in {duration_preload:3f}s.") |
|
return {"languages": wn.langs(), "version": wn.get_version(), "preload_wordnet": preload_wordnet} |
|
|
|
|
|
def get_synsets_by_word_and_language(word: str, lang: str = "eng") -> ResponseWordsAPI: |
|
app_logger.info("start...") |
|
def lemma_names(synsets): |
|
return sorted( |
|
set( |
|
lemma.name().replace('_', ' ') |
|
for syn in synsets |
|
for lemma in syn.lemmas(lang=lang) |
|
) |
|
) |
|
|
|
def lemma_related(lemmas_input, lemmas_method): |
|
return sorted( |
|
set( |
|
rel.name().replace('_', ' ') |
|
for lemma in lemmas_input |
|
for rel in getattr(lemma, lemmas_method)() |
|
) |
|
) |
|
|
|
results = [] |
|
for synset in wn.synsets(word, lang=lang): |
|
lemmas = synset.lemmas(lang=lang) |
|
obj = {"definition": synset.definition(lang=lang)} |
|
|
|
|
|
synonyms = sorted( |
|
set( |
|
lemma.name().replace('_', ' ') |
|
for lemma in lemmas |
|
if lemma.name().lower() != word.lower() |
|
) |
|
) |
|
if synonyms: |
|
obj["synonyms"] = synonyms |
|
|
|
|
|
for field, method in [ |
|
("antonyms", "antonyms"), |
|
("derivation", "derivationally_related_forms"), |
|
("pertainsTo", "pertainyms"), |
|
]: |
|
values = lemma_related(lemmas, method) |
|
if values: |
|
obj[field] = values |
|
|
|
|
|
synset_relations = [ |
|
("typeOf", synset.hypernyms()), |
|
("hasTypes", synset.hyponyms()), |
|
("partOf", synset.member_holonyms() + synset.part_holonyms() + synset.substance_holonyms()), |
|
("hasParts", synset.member_meronyms() + synset.part_meronyms() + synset.substance_meronyms()), |
|
("instanceOf", synset.instance_hypernyms()), |
|
("hasInstances", synset.instance_hyponyms()), |
|
("similarTo", synset.similar_tos()), |
|
("also", synset.also_sees()), |
|
("entails", synset.entailments()), |
|
("hasSubstances", synset.substance_meronyms()), |
|
("inCategory", synset.topic_domains()), |
|
("usageOf", synset.usage_domains()), |
|
("causes", synset.causes()), |
|
("verbGroups", synset.verb_groups()), |
|
] |
|
for field, syns in synset_relations: |
|
values = lemma_names(syns) |
|
if values: |
|
obj[field] = values |
|
|
|
results.append(obj) |
|
return { |
|
"word": word, |
|
"results": results |
|
} |
|
|