alessandro trinca tornidor commited on
Commit
0343c29
·
1 Parent(s): d6e9ab3

refactor: refactored get_wordnet_synonyms()

Browse files
Files changed (1) hide show
  1. my_ghost_writer/text_parsers2.py +109 -107
my_ghost_writer/text_parsers2.py CHANGED
@@ -219,93 +219,8 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
219
  related_word_groups_raw: list[dict[str, Any]] = []
220
  word_lower = word.lower()
221
 
222
- def _get_related_words(related_object, relation_type: TermRelationships, inner_word_lower: str):
223
- related_words = []
224
-
225
- if relation_type == TermRelationships.SYNONYM:
226
- # related_object is a Synset
227
- for local_lemma in related_object.lemmas():
228
- lemma_name = local_lemma.name().replace("_", " ")
229
- if lemma_name.lower() != inner_word_lower:
230
- related_words.append({
231
- "base_form": lemma_name
232
- })
233
- elif relation_type == TermRelationships.ANTONYM:
234
- # related_object is a Lemma
235
- for ant in related_object.antonyms():
236
- ant_name = ant.name().replace("_", " ")
237
- if ant_name.lower() != inner_word_lower:
238
- related_words.append({
239
- "base_form": ant_name
240
- })
241
- else:
242
- # related_object is a Synset
243
- # Get related synsets from the appropriate method
244
- relation_methods = {
245
- TermRelationships.HYPERNYM: related_object.hypernyms,
246
- TermRelationships.HYPONYM: related_object.hyponyms,
247
- TermRelationships.MERONYM: lambda: related_object.member_meronyms() + related_object.substance_meronyms() + related_object.part_meronyms(),
248
- TermRelationships.HOLONYM: lambda: related_object.member_holonyms() + related_object.substance_holonyms() + related_object.part_holonyms(),
249
- TermRelationships.ALSO_SEE: related_object.also_sees,
250
- TermRelationships.CAUSE: related_object.causes,
251
- # TermRelationships.DERIVATIONALLY_RELATED_FORM: related_object.derivationally_related_forms,
252
- # TermRelationships.ENTAILMENT: related_object.entails,
253
- # TermRelationships.PERTAINYM: related_object.pertainyms,
254
- TermRelationships.SIMILAR_TO: related_object.similar_tos,
255
- }
256
- get_words_fn = relation_methods.get(relation_type)
257
- if get_words_fn:
258
- for related_synset in get_words_fn():
259
- # Some methods return Lemma objects, handle both cases
260
- if hasattr(related_synset, "lemmas"):
261
- for local_lemma in related_synset.lemmas():
262
- lemma_name = local_lemma.name().replace("_", " ")
263
- if lemma_name.lower() != inner_word_lower:
264
- related_words.append({
265
- "base_form": lemma_name,
266
- # "is_custom": False,
267
- })
268
- elif hasattr(related_synset, "name"):
269
- lemma_name = related_synset.name().replace("_", " ")
270
- if lemma_name.lower() != inner_word_lower:
271
- related_words.append({
272
- "base_form": lemma_name,
273
- # "is_custom": False,
274
- })
275
-
276
- if related_words:
277
- return {
278
- "relation_type": relation_type,
279
- "source": "wordnet",
280
- "definition": related_object.definition() if hasattr(related_object, "definition") else "",
281
- "examples": related_object.examples()[:2] if hasattr(related_object, "examples") else [],
282
- "wordnet_pos": related_object.pos() if hasattr(related_object, "pos") else None,
283
- "related_words": related_words,
284
- }
285
- return None
286
-
287
  # 1. Custom Related Word Lookup (all relationships)
288
- for rel_type in TermRelationships:
289
- custom_groups = custom_synonym_handler.get_related(word_lower, rel_type)
290
- if custom_groups:
291
- for related in custom_groups:
292
- words = related["words"]
293
- definition = related.get("definition", "")
294
- related_word_options = []
295
- for word_from_related_words in words:
296
- related_word_options.append({
297
- "base_form": word_from_related_words,
298
- "is_custom": True,
299
- "definition": definition,
300
- })
301
- related_word_groups_raw.append({
302
- "relation_type": rel_type,
303
- "source": "custom",
304
- "definition": definition,
305
- "examples": [],
306
- "wordnet_pos": None,
307
- "related_words": related_word_options,
308
- })
309
  # 2. WordNet Lookup
310
  try:
311
  # Map spaCy POS to wn POS
@@ -326,36 +241,123 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
326
  # Process each synset and its relations
327
  for synset in synsets:
328
  result = _get_related_words(synset, TermRelationships.SYNONYM, word_lower)
329
- if result:
330
- related_word_groups_raw.append(result)
331
- # todo: check if it's possible to remove the first 'IF result:...' and move the second one
332
  for lemma in synset.lemmas():
333
  result = _get_related_words(lemma, TermRelationships.ANTONYM, word_lower)
334
- if result:
335
- related_word_groups_raw.append(result)
336
  for rel_type in [
337
- TermRelationships.HYPERNYM, TermRelationships.HYPONYM, TermRelationships.MERONYM,
338
- TermRelationships.HOLONYM, TermRelationships.ALSO_SEE, TermRelationships.CAUSE,
339
- # todo: try to understand how to fix the related missing methods
340
- # TermRelationships.DERIVATIONALLY_RELATED_FORM,
341
- # TermRelationships.ENTAILMENT,
342
- # TermRelationships.PERTAINYM,
343
- TermRelationships.SIMILAR_TO
344
- ]:
345
- app_logger.info(f"synset: {type(synset)}, '{synset}'")
346
- if not isinstance(synset, Synset):
347
- pass
348
  result = _get_related_words(synset, rel_type, word_lower)
349
- if result:
350
- if result["relation_type"] == TermRelationships.CAUSE:
351
- app_logger.info(f"Adding result for relation type '{rel_type}': {result}")
352
- related_word_groups_raw.append(result)
353
 
354
  except Exception as ex1:
355
  app_logger.error(f"Error getting wn synonyms: '{ex1}' with: word:{type(word)}, '{word}', pos_tag: {type(pos_tag)}, '{pos_tag}'")
356
  raise HTTPException(status_code=500, detail=f"Error retrieving related words: '{str(ex1)}'")
357
 
358
- return related_word_groups_raw
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
 
361
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
 
219
  related_word_groups_raw: list[dict[str, Any]] = []
220
  word_lower = word.lower()
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  # 1. Custom Related Word Lookup (all relationships)
223
+ _extract_related_word_groups_custom(related_word_groups_raw, word_lower)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  # 2. WordNet Lookup
225
  try:
226
  # Map spaCy POS to wn POS
 
241
  # Process each synset and its relations
242
  for synset in synsets:
243
  result = _get_related_words(synset, TermRelationships.SYNONYM, word_lower)
244
+ related_word_groups_raw.append(result)
 
 
245
  for lemma in synset.lemmas():
246
  result = _get_related_words(lemma, TermRelationships.ANTONYM, word_lower)
247
+ related_word_groups_raw.append(result)
 
248
  for rel_type in [
249
+ TermRelationships.HYPERNYM, TermRelationships.HYPONYM, TermRelationships.MERONYM,
250
+ TermRelationships.HOLONYM, TermRelationships.ALSO_SEE, TermRelationships.CAUSE,
251
+ # todo: try to understand how to fix the related missing methods
252
+ # TermRelationships.DERIVATIONALLY_RELATED_FORM,
253
+ # TermRelationships.ENTAILMENT,
254
+ # TermRelationships.PERTAINYM,
255
+ TermRelationships.SIMILAR_TO
256
+ ]:
 
 
 
257
  result = _get_related_words(synset, rel_type, word_lower)
258
+ related_word_groups_raw.append(result)
 
 
 
259
 
260
  except Exception as ex1:
261
  app_logger.error(f"Error getting wn synonyms: '{ex1}' with: word:{type(word)}, '{word}', pos_tag: {type(pos_tag)}, '{pos_tag}'")
262
  raise HTTPException(status_code=500, detail=f"Error retrieving related words: '{str(ex1)}'")
263
 
264
+ return [related_words for related_words in related_word_groups_raw if related_words is not None]
265
+
266
+
267
+ def _extract_related_word_groups_custom(related_word_groups_raw, word_lower):
268
+ for rel_type in TermRelationships:
269
+ custom_groups = custom_synonym_handler.get_related(word_lower, rel_type)
270
+ if custom_groups:
271
+ for related in custom_groups:
272
+ words = related["words"]
273
+ definition = related.get("definition", "")
274
+ related_word_options = []
275
+ for word_from_related_words in words:
276
+ related_word_options.append({
277
+ "base_form": word_from_related_words,
278
+ "is_custom": True,
279
+ "definition": definition,
280
+ })
281
+ related_word_groups_raw.append({
282
+ "relation_type": rel_type,
283
+ "source": "custom",
284
+ "definition": definition,
285
+ "examples": [],
286
+ "wordnet_pos": None,
287
+ "related_words": related_word_options,
288
+ })
289
+
290
+
291
+ def _get_base_form_by_synset_type(local_lemma: str, inner_word_lower: str, related_words: list[dict]) -> list[dict]:
292
+ lemma_name = local_lemma.replace("_", " ")
293
+ if lemma_name.lower() != inner_word_lower:
294
+ related_words.append({
295
+ "base_form": lemma_name
296
+ })
297
+ return related_words
298
+
299
+
300
+ def _get_related_words(related_object, relation_type: TermRelationships, inner_word_lower: str) -> dict|None:
301
+ related_words = []
302
+
303
+ if relation_type == TermRelationships.SYNONYM:
304
+ # related_object is a Synset
305
+ for local_lemma in related_object.lemmas():
306
+ _get_base_form_by_synset_type(local_lemma.name(), inner_word_lower, related_words)
307
+ elif relation_type == TermRelationships.ANTONYM:
308
+ # related_object is a Lemma
309
+ for ant in related_object.antonyms():
310
+ _get_base_form_by_synset_type(ant.name(), inner_word_lower, related_words)
311
+ else:
312
+ # related_object is a Synset
313
+ # Get related synsets from the appropriate method
314
+ relation_methods = {
315
+ TermRelationships.HYPERNYM: related_object.hypernyms,
316
+ TermRelationships.HYPONYM: related_object.hyponyms,
317
+ TermRelationships.MERONYM: lambda: related_object.member_meronyms() + related_object.substance_meronyms() + related_object.part_meronyms(),
318
+ TermRelationships.HOLONYM: lambda: related_object.member_holonyms() + related_object.substance_holonyms() + related_object.part_holonyms(),
319
+ TermRelationships.ALSO_SEE: related_object.also_sees,
320
+ TermRelationships.CAUSE: related_object.causes,
321
+ # TermRelationships.DERIVATIONALLY_RELATED_FORM: related_object.derivationally_related_forms,
322
+ # TermRelationships.ENTAILMENT: related_object.entails,
323
+ # TermRelationships.PERTAINYM: related_object.pertainyms,
324
+ TermRelationships.SIMILAR_TO: related_object.similar_tos,
325
+ }
326
+ get_words_fn = relation_methods.get(relation_type)
327
+ if get_words_fn:
328
+ for related_synset in get_words_fn():
329
+ _extract_lemmas_or_names_from_synset(inner_word_lower, related_synset, related_words)
330
+ if related_words:
331
+ return {
332
+ "relation_type": relation_type,
333
+ "source": "wordnet",
334
+ "definition": _get_related_object_definition(related_object),
335
+ "examples": _get_related_object_examples(related_object),
336
+ "wordnet_pos": _get_related_wordnet_pos(related_object),
337
+ "related_words": related_words,
338
+ }
339
+ return None
340
+
341
+
342
+ def _extract_lemmas_or_names_from_synset(inner_word_lower, related_synset, related_words):
343
+ # Some methods return Lemma objects, handle both cases
344
+ if hasattr(related_synset, "lemmas"):
345
+ for local_lemma in related_synset.lemmas():
346
+ _get_base_form_by_synset_type(local_lemma.name(), inner_word_lower, related_words)
347
+ elif hasattr(related_synset, "name"):
348
+ _get_base_form_by_synset_type(related_synset.name(), inner_word_lower, related_words)
349
+
350
+
351
+ def _get_related_wordnet_pos(related_object: Synset):
352
+ return related_object.pos() if hasattr(related_object, "pos") else None
353
+
354
+
355
+ def _get_related_object_examples(related_object: Synset, n: int = 2) -> list[str]:
356
+ return related_object.examples()[:n] if hasattr(related_object, "examples") else []
357
+
358
+
359
+ def _get_related_object_definition(related_object: Synset) -> str:
360
+ return related_object.definition() if hasattr(related_object, "definition") else ""
361
 
362
 
363
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str: