File size: 3,147 Bytes
a901fdc
 
e0b1feb
 
38adeb3
e0b1feb
 
 
9fbe321
 
a901fdc
 
 
 
 
9fbe321
e0b1feb
 
 
38adeb3
 
 
e0b1feb
38adeb3
 
 
e0b1feb
 
38adeb3
 
 
e0b1feb
38adeb3
 
 
e0b1feb
 
38adeb3
 
 
 
 
 
 
 
e0b1feb
38adeb3
 
 
e0b1feb
 
 
 
38adeb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0b1feb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from datetime import datetime

from nltk.corpus import wordnet31 as wn

from my_ghost_writer.constants import app_logger
from my_ghost_writer.type_hints import ResponseWordsAPI


def get_current_info_wordnet(preload_wordnet=False):
    if preload_wordnet:
        t0_preload = datetime.now()
        wn.synsets("preload")  # prelaod wordnet
        t1_preload = datetime.now()
        duration_preload = (t1_preload - t0_preload).total_seconds()
        app_logger.info(f"wordnet sysnet preloaded in {duration_preload:3f}s.")
    return {"languages": wn.langs(), "version": wn.get_version(), "preload_wordnet": preload_wordnet}


def get_synsets_by_word_and_language(word: str, lang: str = "eng") -> ResponseWordsAPI:
    app_logger.info("start...")
    def lemma_names(synsets):
        return sorted(
            set(
                lemma.name().replace('_', ' ')
                for syn in synsets
                for lemma in syn.lemmas(lang=lang)
            )
        )

    def lemma_related(lemmas_input, lemmas_method):
        return sorted(
            set(
                rel.name().replace('_', ' ')
                for lemma in lemmas_input
                for rel in getattr(lemma, lemmas_method)()
            )
        )

    results = []
    for synset in wn.synsets(word, lang=lang):
        lemmas = synset.lemmas(lang=lang)
        obj = {"definition": synset.definition(lang=lang)}

        # Single-line fields
        synonyms = sorted(
            set(
                lemma.name().replace('_', ' ')
                for lemma in lemmas
                if lemma.name().lower() != word.lower()
            )
        )
        if synonyms:
            obj["synonyms"] = synonyms

        # Lemma-based relations
        for field, method in [
            ("antonyms", "antonyms"),
            ("derivation", "derivationally_related_forms"),
            ("pertainsTo", "pertainyms"),
        ]:
            values = lemma_related(lemmas, method)
            if values:
                obj[field] = values

        # Synset-based relations
        synset_relations = [
            ("typeOf", synset.hypernyms()),
            ("hasTypes", synset.hyponyms()),
            ("partOf", synset.member_holonyms() + synset.part_holonyms() + synset.substance_holonyms()),
            ("hasParts", synset.member_meronyms() + synset.part_meronyms() + synset.substance_meronyms()),
            ("instanceOf", synset.instance_hypernyms()),
            ("hasInstances", synset.instance_hyponyms()),
            ("similarTo", synset.similar_tos()),
            ("also", synset.also_sees()),
            ("entails", synset.entailments()),
            ("hasSubstances", synset.substance_meronyms()),
            ("inCategory", synset.topic_domains()),
            ("usageOf", synset.usage_domains()),
            ("causes", synset.causes()),
            ("verbGroups", synset.verb_groups()),
        ]
        for field, syns in synset_relations:
            values = lemma_names(syns)
            if values:
                obj[field] = values

        results.append(obj)
    return {
        "word": word,
        "results": results
    }