import json from nltk.corpus import wordnet as wn from tests import EVENTS_FOLDER from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer from my_ghost_writer.text_parsers2 import get_wordnet_synonyms def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_report: dict): for (row_k, row_v), (expected_row_k, expected_row_v) in zip(detailed_report.items(), expected_detailed_report.items()): cls.assertEqual(row_k, expected_row_k) del row_v["samples"] del expected_row_v["samples"] del row_v["sample_count"] del expected_row_v["sample_count"] cls.assertDictEqual(row_v, expected_row_v) def assert__json_structure__get_wordnet_synonyms(cls, word): with open(EVENTS_FOLDER / f"get_wordnet_synonyms_{word}_ok1.json", "r") as src: expected_detailed_report = json.load(src) related_words = get_wordnet_synonyms(word) first_related_words = related_words[0] analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(first_related_words) detailed_report = analyzer.get_detailed_type_report() analyze_detailed_report_lists(cls, detailed_report, expected_detailed_report) # with open(EVENTS_FOLDER / f"get_wordnet_synonyms_{word}_ok1.json", "w") as src: # json.dump(detailed_report, src) def get_relationships(synset): relationships = { 'synonyms': len(synset.lemma_names()), 'antonyms': sum(len(lemma.antonyms()) for lemma in synset.lemmas()), 'hypernyms': len(synset.hypernyms()), 'hyponyms': len(synset.hyponyms()), 'holonyms': len(synset.member_holonyms()) + len(synset.part_holonyms()) + len(synset.substance_holonyms()), 'meronyms': len(synset.member_meronyms()) + len(synset.part_meronyms()) + len(synset.substance_meronyms()), 'similar_tos': len(synset.similar_tos()), 'also_sees': len(synset.also_sees()), 'causes': len(synset.causes()) } return relationships def extract_word_relationships(): results = [] holonym_results = [] cause_results = [] for pos in ['n', 'v']: for synset in wn.all_synsets(pos): rels = get_relationships(synset) total = sum(1 for v in rels.values() if v > 0) results.append((synset, total, rels)) if rels['holonyms'] > 0: holonym_results.append((synset, rels['holonyms'], rels)) if rels['causes'] > 0: cause_results.append((synset, rels['causes'], rels)) # Sort and get top 5 results.sort(key=lambda x: x[1], reverse=True) holonym_results.sort(key=lambda x: x[1], reverse=True) cause_results.sort(key=lambda x: x[1], reverse=True) print("Top 5 synsets with most relationships:") for synset, total, rels in results[:5]: print(f"{synset.name()} ({synset.definition()}): {rels}") print("\nTop 5 synsets with holonym relationships:") for synset, count, rels in holonym_results[:5]: print(f"{synset.name()} ({synset.definition()}): {rels}") print("\nTop 5 synsets with cause relationships:") for synset, count, rels in cause_results[:5]: print(f"{synset.name()} ({synset.definition()}): {rels}") if __name__ == "__main__": extract_word_relationships()