alessandro trinca tornidor commited on
Commit
a707261
·
1 Parent(s): 59bceb1

feat: make /thesaurus-inflated-phrase agnostic, not bounded to synonyms - /thesaurus-custom fixed

Browse files
my_ghost_writer/app.py CHANGED
@@ -27,7 +27,7 @@ from my_ghost_writer.pymongo_utils import mongodb_health_check
27
  from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
28
  from my_ghost_writer.thesaurus import get_current_info_wordnet
29
  from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
30
- RequestSplitText, RequestTextFrequencyBody, MultiWordSynonymResponse, CustomSynonymRequest)
31
 
32
 
33
  async def mongo_health_check_background_task():
@@ -225,7 +225,7 @@ def get_thesaurus_wordsapi(body: RequestQueryThesaurusWordsapiBody | str) -> JSO
225
  raise HTTPException(status_code=response.status_code, detail=msg)
226
 
227
 
228
- @app.post("/thesaurus-inflated-phrase", response_model=MultiWordSynonymResponse)
229
  async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
230
  """
231
  Get contextual synonyms for a selected phrase (one or more words).
@@ -252,7 +252,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
252
  app_logger.info(f"text:{text}!")
253
  app_logger.info(f"word:{word}!")
254
 
255
- # if use_mongo...
256
 
257
  try:
258
  # The new function in text_parsers2 does all the heavy lifting
@@ -266,7 +266,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
266
  app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
267
  app_logger.debug(results)
268
 
269
- # if use_mongo and results: ...
270
 
271
  message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
272
 
@@ -274,13 +274,16 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
274
  duration = (t2 - t1).total_seconds()
275
  app_logger.info(f"got MultiWordSynonymResponse() result in: {duration:.3f}s. ...")
276
  # Construct the final response using our Pydantic model
277
- return MultiWordSynonymResponse(
278
  success=True,
279
  original_phrase=body.word,
280
  original_indices={"start": body.start, "end": body.end},
281
  results=results,
282
- message=message
 
283
  )
 
 
284
 
285
  except HTTPException as http_ex:
286
  # Re-raise known HTTP exceptions to be handled by FastAPI's handler
@@ -292,10 +295,12 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
292
 
293
 
294
  @app.post("/thesaurus-custom")
295
- async def add_custom_synonyms(body: CustomSynonymRequest):
296
  """Adds custom synonyms for a given word to the in-memory store."""
297
  try:
298
- custom_synonym_handler.add_entry(body.word, [r.model_dump() for r in body.related])
 
 
299
  return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
300
 
301
  except Exception as e:
 
27
  from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
28
  from my_ghost_writer.thesaurus import get_current_info_wordnet
29
  from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
30
+ RequestSplitText, RequestTextFrequencyBody, MultiRelatedWordResponse, CustomRelatedWordRequest)
31
 
32
 
33
  async def mongo_health_check_background_task():
 
225
  raise HTTPException(status_code=response.status_code, detail=msg)
226
 
227
 
228
+ @app.post("/thesaurus-inflated-phrase", response_model=MultiRelatedWordResponse)
229
  async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
230
  """
231
  Get contextual synonyms for a selected phrase (one or more words).
 
252
  app_logger.info(f"text:{text}!")
253
  app_logger.info(f"word:{word}!")
254
 
255
+ # persistence
256
 
257
  try:
258
  # The new function in text_parsers2 does all the heavy lifting
 
266
  app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
267
  app_logger.debug(results)
268
 
269
+ # persistence
270
 
271
  message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
272
 
 
274
  duration = (t2 - t1).total_seconds()
275
  app_logger.info(f"got MultiWordSynonymResponse() result in: {duration:.3f}s. ...")
276
  # Construct the final response using our Pydantic model
277
+ response_object = MultiRelatedWordResponse(
278
  success=True,
279
  original_phrase=body.word,
280
  original_indices={"start": body.start, "end": body.end},
281
  results=results,
282
+ message=message,
283
+ duration=duration
284
  )
285
+ response_json = response_object.model_dump_json(exclude_none=True)
286
+ return JSONResponse(status_code=200, content=json.loads(response_json))
287
 
288
  except HTTPException as http_ex:
289
  # Re-raise known HTTP exceptions to be handled by FastAPI's handler
 
295
 
296
 
297
  @app.post("/thesaurus-custom")
298
+ async def add_custom_synonyms(body: CustomRelatedWordRequest):
299
  """Adds custom synonyms for a given word to the in-memory store."""
300
  try:
301
+ word = body.word
302
+ related_list = body.related
303
+ custom_synonym_handler.add_entry(word, related_list)
304
  return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
305
 
306
  except Exception as e:
my_ghost_writer/custom_synonym_handler.py CHANGED
@@ -1,22 +1,46 @@
1
  from typing import Any
2
 
 
 
 
3
 
4
  class CustomSynonymHandler:
 
 
 
5
  def __init__(self):
6
- # {word: {relation_type: [{word: related_word, definition: definition}]}}
7
- self.lexicon: dict[str, dict[str, list[dict[str, Any]]]] = {}
 
 
 
8
  # For reverse lookups
9
  self.inverted_index: dict[str, set[str]] = {}
10
 
11
- def add_entry(self, word: str, related: list[dict[str, Any]]):
 
 
 
 
 
 
 
 
 
 
 
12
  word = word.lower()
13
  if word not in self.lexicon:
14
  self.lexicon[word] = {}
15
- for relation in related:
16
- relation_type = relation["type"]
 
 
 
 
17
  group = {
18
- "words": [w.lower().strip() for w in relation["words"]],
19
- "definition": relation.get("definition")
20
  }
21
  if relation_type not in self.lexicon[word]:
22
  self.lexicon[word][relation_type] = []
@@ -27,9 +51,20 @@ class CustomSynonymHandler:
27
  self.inverted_index[w].add(word)
28
 
29
  def delete_entry(self, word: str):
 
 
 
 
 
 
 
 
 
 
 
30
  word = word.lower()
31
  if word not in self.lexicon:
32
- raise KeyError(f"No custom synonyms found for word '{word}'.")
33
  # Remove from inverted index
34
  for relation_groups in self.lexicon[word].values():
35
  for group in relation_groups:
@@ -37,18 +72,51 @@ class CustomSynonymHandler:
37
  del self.lexicon[word]
38
 
39
  def _update_group_words(self, group, word):
 
 
 
 
 
 
 
 
 
 
 
40
  for w in group["words"]:
41
  if w in self.inverted_index:
42
  self.inverted_index[w].discard(word)
43
  if not self.inverted_index[w]:
44
  del self.inverted_index[w]
45
 
46
- def get_related(self, word: str, relation_type: str) -> list[dict[str, Any]]:
 
 
 
 
 
 
 
 
 
 
 
47
  word = word.lower()
48
- if word in self.lexicon and relation_type in self.lexicon[word]:
49
- return self.lexicon[word][relation_type]
 
 
50
  return []
51
 
52
  def reverse_lookup(self, related_word: str) -> set[str]:
 
 
 
 
 
 
 
 
 
53
  related_word = related_word.lower()
54
  return self.inverted_index.get(related_word, set())
 
1
  from typing import Any
2
 
3
+ from my_ghost_writer.constants import app_logger
4
+ from my_ghost_writer.type_hints import RelatedEntry, TermRelationships
5
+
6
 
7
  class CustomSynonymHandler:
8
+ """
9
+ Handles the storage and retrieval of custom-related words.
10
+ """
11
  def __init__(self):
12
+ """
13
+ Initializes the CustomSynonymHandler with an empty lexicon and inverted index.
14
+ """
15
+ # {word: {relation_type: [{words: related_word, definition: definition}]}}
16
+ self.lexicon: dict[str, dict[TermRelationships, list[dict[str, Any]]]] = {}
17
  # For reverse lookups
18
  self.inverted_index: dict[str, set[str]] = {}
19
 
20
+ def add_entry(self, word: str, related: list[RelatedEntry]):
21
+ """
22
+ Adds a custom-related word entry to the lexicon.
23
+ Side Effects: Updates the lexicon and inverted_index with the new related word entry.
24
+
25
+ Args:
26
+ word: The word to add related words for (str).
27
+ related: A list of RelatedEntry objects representing the related words (list[RelatedEntry]).
28
+
29
+ Returns:
30
+ None
31
+ """
32
  word = word.lower()
33
  if word not in self.lexicon:
34
  self.lexicon[word] = {}
35
+ for n, relation in enumerate(related):
36
+ try:
37
+ relation_type = relation.type
38
+ except AttributeError as attr_err:
39
+ app_logger.error(f"AttributeError: {attr_err}, n:{n}.")
40
+ raise attr_err
41
  group = {
42
+ "words": [w.lower().strip() for w in relation.words],
43
+ "definition": relation.definition
44
  }
45
  if relation_type not in self.lexicon[word]:
46
  self.lexicon[word][relation_type] = []
 
51
  self.inverted_index[w].add(word)
52
 
53
  def delete_entry(self, word: str):
54
+ """
55
+ Deletes a custom-related word entry from the lexicon.
56
+ Side Effects: Removes the related word entry from the lexicon and updates the inverted_index.
57
+ Raises: KeyError: If the word is not found in the lexicon.
58
+
59
+ Args:
60
+ word: The word to delete related words for (str).
61
+
62
+ Returns:
63
+ None
64
+ """
65
  word = word.lower()
66
  if word not in self.lexicon:
67
+ raise KeyError(f"No custom related words found for word '{word}'.")
68
  # Remove from inverted index
69
  for relation_groups in self.lexicon[word].values():
70
  for group in relation_groups:
 
72
  del self.lexicon[word]
73
 
74
  def _update_group_words(self, group, word):
75
+ """
76
+ Updates the inverted index when a related word entry is deleted.
77
+ Side Effects: Updates the inverted_index by discarding or deleting entries.
78
+
79
+ Args:
80
+ group: The group of related words (dict).
81
+ word: The word that the related words are associated with (str).
82
+
83
+ Returns:
84
+ None
85
+ """
86
  for w in group["words"]:
87
  if w in self.inverted_index:
88
  self.inverted_index[w].discard(word)
89
  if not self.inverted_index[w]:
90
  del self.inverted_index[w]
91
 
92
+ def get_related(self, word: str, relation_type: TermRelationships) -> list[dict[str, Any]]:
93
+ """
94
+ Retrieves related words for a given word and relation type.
95
+
96
+ Args:
97
+ word: The word to retrieve related words for (str).
98
+ relation_type: The type of relationship to retrieve (TermRelationships).
99
+
100
+ Returns:
101
+ A list of dictionaries, where each dictionary represents a related word group (list[dict[str, Any]]).
102
+ Returns an empty list if no related words are found.
103
+ """
104
  word = word.lower()
105
+ if word in self.lexicon:
106
+ word_lexicon = self.lexicon[word]
107
+ if relation_type in word_lexicon:
108
+ return word_lexicon[relation_type]
109
  return []
110
 
111
  def reverse_lookup(self, related_word: str) -> set[str]:
112
+ """
113
+ Performs a reverse lookup to find words that have the given word as a related word.
114
+
115
+ Args:
116
+ related_word: The word to search for (str).
117
+
118
+ Returns:
119
+ A set of words that have the given word as a related word (set[str]).
120
+ """
121
  related_word = related_word.lower()
122
  return self.inverted_index.get(related_word, set())
my_ghost_writer/jsonpath_comparator.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from jsonpath_ng import parse
2
+ from jsonpath_ng.ext import parse as parse_ext
3
+ from typing import Dict, Set, Any, List
4
+
5
+
6
+ def extract_structure_paths(data: dict) -> Set[str]:
7
+ """
8
+ Extract all available paths from JSON data
9
+
10
+ Args:
11
+ data: JSON data to analyze
12
+
13
+ Returns:
14
+ Set of all JSONPath expressions found in the data
15
+ """
16
+ paths = set()
17
+
18
+ def _collect_paths(obj: Any, path: str = "$"):
19
+ if isinstance(obj, dict):
20
+ for key, value in obj.items():
21
+ current_path = f"{path}.{key}"
22
+ paths.add(current_path)
23
+ _collect_paths(value, current_path)
24
+ elif isinstance(obj, list):
25
+ array_path = f"{path}[*]"
26
+ paths.add(array_path)
27
+ if obj:
28
+ for item in obj:
29
+ _collect_paths(item, array_path)
30
+
31
+ _collect_paths(data)
32
+ return paths
33
+
34
+
35
+ def extract_structure_paths_with_types(data: dict) -> Dict[str, str]:
36
+ """
37
+ Extract all available paths from JSON data with their value types
38
+
39
+ Args:
40
+ data: JSON data to analyze
41
+
42
+ Returns:
43
+ Dictionary mapping JSONPath expressions to their value types
44
+ """
45
+ paths_with_types = {}
46
+
47
+ def _get_type_name(value: Any) -> str:
48
+ """Get a descriptive type name for the value"""
49
+ if value is None:
50
+ return "null"
51
+ elif isinstance(value, bool):
52
+ return "boolean"
53
+ elif isinstance(value, int):
54
+ return "integer"
55
+ elif isinstance(value, float):
56
+ return "number"
57
+ elif isinstance(value, str):
58
+ return "string"
59
+ elif isinstance(value, list):
60
+ if not value:
61
+ return "array(empty)"
62
+ # Get the types of array elements
63
+ element_types = set(_get_type_name(item) for item in value)
64
+ if len(element_types) == 1:
65
+ return f"array({element_types.pop()})"
66
+ else:
67
+ return f"array(mixed: {', '.join(sorted(element_types))})"
68
+ elif isinstance(value, dict):
69
+ return "object"
70
+ else:
71
+ return type(value).__name__
72
+
73
+ def _collect_paths(obj: Any, path: str = "$"):
74
+ paths_with_types[path] = _get_type_name(obj)
75
+
76
+ if isinstance(obj, dict):
77
+ for key, value in obj.items():
78
+ current_path = f"{path}.{key}"
79
+ _collect_paths(value, current_path)
80
+ elif isinstance(obj, list):
81
+ array_path = f"{path}[*]"
82
+ if obj:
83
+ # Use first item as representative type for array elements
84
+ _collect_paths(obj[0], array_path)
85
+
86
+ _collect_paths(data)
87
+ return paths_with_types
88
+
89
+
90
+ def print_comparison_report(comparison: Dict[str, Any]):
91
+ """
92
+ Print formatted comparison report
93
+
94
+ Args:
95
+ comparison: Results from compare_structures method
96
+ """
97
+
98
+ print("=== JSONPATH STRUCTURE COMPARISON ===\n")
99
+
100
+ if comparison["added_paths"]:
101
+ print("➕ ADDED PATHS:")
102
+ for path in sorted(comparison["added_paths"]):
103
+ print(f" {path}")
104
+ print()
105
+
106
+ if comparison["removed_paths"]:
107
+ print("➖ REMOVED PATHS:")
108
+ for path in sorted(comparison["removed_paths"]):
109
+ print(f" {path}")
110
+ print()
111
+
112
+ if comparison.get("path_validations"):
113
+ print("🔍 PATH VALIDATIONS:")
114
+ for path, validation in comparison["path_validations"].items():
115
+ if "error" in validation:
116
+ print(f" ⚠️ {path}: {validation['error']}")
117
+ else:
118
+ status = validation["status"]
119
+ old_count = validation["old_count"]
120
+ new_count = validation["new_count"]
121
+ print(f" {status} {path}: {old_count} → {new_count}")
122
+
123
+
124
+ def print_comparison_report_with_types(comparison: Dict[str, Any]):
125
+ """
126
+ Print formatted comparison report including type information
127
+
128
+ Args:
129
+ comparison: Results from compare_structures_with_types method
130
+ """
131
+ print("=== JSONPATH STRUCTURE COMPARISON WITH TYPES ===\n")
132
+
133
+ if comparison["added_paths"]:
134
+ print("➕ ADDED PATHS:")
135
+ for path, type_info in sorted(comparison["added_paths"].items()):
136
+ print(f" {path} ({type_info})")
137
+ print()
138
+
139
+ if comparison["removed_paths"]:
140
+ print("➖ REMOVED PATHS:")
141
+ for path, type_info in sorted(comparison["removed_paths"].items()):
142
+ print(f" {path} ({type_info})")
143
+ print()
144
+
145
+ if comparison.get("type_changes"):
146
+ print("🔄 TYPE CHANGES:")
147
+ for path, change in sorted(comparison["type_changes"].items()):
148
+ print(f" {path}: {change['old_type']} → {change['new_type']}")
149
+ print()
150
+
151
+ if comparison.get("path_validations"):
152
+ print("🔍 PATH VALIDATIONS:")
153
+ for path, validation in comparison["path_validations"].items():
154
+ if "error" in validation:
155
+ print(f" ⚠️ {path}: {validation['error']}")
156
+ else:
157
+ status = validation["status"]
158
+ old_count = validation["old_count"]
159
+ new_count = validation["new_count"]
160
+ print(f" {status} {path}: {old_count} → {new_count}")
161
+
162
+
163
+ class JSONPathComparator:
164
+ """
165
+ Compare JSON structures using JSONPath expressions
166
+
167
+ This class allows you to define expected paths and compare different
168
+ JSON responses to detect structural changes during refactoring.
169
+ """
170
+
171
+ def __init__(self, common_paths: List[str] = None):
172
+ """
173
+ Initialize comparator with common paths to validate
174
+
175
+ Args:
176
+ common_paths: List of JSONPath expressions to validate across responses
177
+ """
178
+ self.common_paths = common_paths or []
179
+
180
+ def compare_structures(self, old_data: dict, new_data: dict) -> Dict[str, Any]:
181
+ """
182
+ Compare two JSON structures using JSONPath
183
+
184
+ Args:
185
+ old_data: Original JSON structure
186
+ new_data: New JSON structure to compare against
187
+
188
+ Returns:
189
+ Dictionary containing comparison results with added/removed paths
190
+ and validation results for common paths
191
+ """
192
+
193
+ old_paths = extract_structure_paths(old_data)
194
+ new_paths = extract_structure_paths(new_data)
195
+
196
+ comparison = {
197
+ "added_paths": new_paths - old_paths,
198
+ "removed_paths": old_paths - new_paths,
199
+ "common_paths": old_paths & new_paths,
200
+ "path_validations": {}
201
+ }
202
+
203
+ # Test common JSONPath expressions
204
+ for path_expr in self.common_paths:
205
+ try:
206
+ jsonpath = parse_ext(path_expr)
207
+
208
+ old_matches = [match.value for match in jsonpath.find(old_data)]
209
+ new_matches = [match.value for match in jsonpath.find(new_data)]
210
+
211
+ comparison["path_validations"][path_expr] = {
212
+ "old_found": len(old_matches) > 0,
213
+ "new_found": len(new_matches) > 0,
214
+ "old_count": len(old_matches),
215
+ "new_count": len(new_matches),
216
+ "status": "✅" if (len(old_matches) > 0) == (len(new_matches) > 0) else "❌"
217
+ }
218
+
219
+ except Exception as e:
220
+ comparison["path_validations"][path_expr] = {
221
+ "error": str(e),
222
+ "status": "⚠️"
223
+ }
224
+
225
+ return comparison
226
+
227
+ def compare_structures_with_types(self, old_data: dict, new_data: dict) -> Dict[str, Any]:
228
+ """
229
+ Compare two JSON structures using JSONPath including type information
230
+
231
+ Args:
232
+ old_data: Original JSON structure
233
+ new_data: New JSON structure to compare against
234
+
235
+ Returns:
236
+ Dictionary containing comparison results with type information
237
+ """
238
+ old_paths = extract_structure_paths_with_types(old_data)
239
+ new_paths = extract_structure_paths_with_types(new_data)
240
+
241
+ # Find paths that exist in both but have different types
242
+ type_changes = {}
243
+ common_paths = set(old_paths.keys()) & set(new_paths.keys())
244
+ for path in common_paths:
245
+ if old_paths[path] != new_paths[path]:
246
+ type_changes[path] = {
247
+ "old_type": old_paths[path],
248
+ "new_type": new_paths[path]
249
+ }
250
+
251
+ comparison = {
252
+ "added_paths": {k: v for k, v in new_paths.items() if k not in old_paths},
253
+ "removed_paths": {k: v for k, v in old_paths.items() if k not in new_paths},
254
+ "common_paths": {k: v for k, v in old_paths.items() if k in new_paths},
255
+ "type_changes": type_changes,
256
+ "path_validations": {}
257
+ }
258
+
259
+ # Test common JSONPath expressions
260
+ for path_expr in self.common_paths:
261
+ try:
262
+ jsonpath = parse_ext(path_expr)
263
+
264
+ old_matches = [match.value for match in jsonpath.find(old_data)]
265
+ new_matches = [match.value for match in jsonpath.find(new_data)]
266
+
267
+ comparison["path_validations"][path_expr] = {
268
+ "old_found": len(old_matches) > 0,
269
+ "new_found": len(new_matches) > 0,
270
+ "old_count": len(old_matches),
271
+ "new_count": len(new_matches),
272
+ "status": "✅" if (len(old_matches) > 0) == (len(new_matches) > 0) else "❌"
273
+ }
274
+
275
+ except Exception as e:
276
+ comparison["path_validations"][path_expr] = {
277
+ "error": str(e),
278
+ "status": "⚠️"
279
+ }
280
+
281
+ return comparison
282
+
283
+
284
+ def compare_json_with_jsonpath(old_data: dict, new_data: dict, common_paths: List[str] = None):
285
+ """
286
+ Main function to compare JSON structures
287
+
288
+ Args:
289
+ old_data: Original JSON structure
290
+ new_data: New JSON structure to compare
291
+ common_paths: Optional list of JSONPath expressions to validate
292
+
293
+ Returns:
294
+ Dictionary containing comparison results
295
+ """
296
+ comparator = JSONPathComparator(common_paths)
297
+ comparison = comparator.compare_structures(old_data, new_data)
298
+ print_comparison_report(comparison)
299
+ return comparison
300
+
301
+
302
+ def compare_json_with_jsonpath_and_types(old_data: dict, new_data: dict, common_paths: List[str] = None):
303
+ """
304
+ Main function to compare JSON structures with type information
305
+
306
+ Args:
307
+ old_data: Original JSON structure
308
+ new_data: New JSON structure to compare
309
+ common_paths: Optional list of JSONPath expressions to validate
310
+
311
+ Returns:
312
+ Dictionary containing comparison results with type information
313
+ """
314
+ comparator = JSONPathComparator(common_paths)
315
+ comparison = comparator.compare_structures_with_types(old_data, new_data)
316
+ print_comparison_report_with_types(comparison)
317
+ return comparison
my_ghost_writer/jsonpath_extractor.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from typing import Any, Dict, Set
3
+
4
+
5
+ class JSONPathStructureAnalyzer:
6
+ """
7
+ Analyze JSON structure using JSONPath expressions
8
+
9
+ Example usage:
10
+ analyzer = JSONPathStructureAnalyzer()
11
+ analyzer.extract_all_paths({"success": True, "data": {"users": [{"id": 1}]}})
12
+ print(analyzer.get_structure_report())
13
+ """
14
+
15
+ def __init__(self):
16
+ self.paths = set()
17
+ self.types = defaultdict(set)
18
+ self.samples = defaultdict(list)
19
+ self.array_lengths = {} # New: Store array lengths separately
20
+
21
+ def extract_all_paths(self, data: dict, max_samples: int = 3) -> Set[str]:
22
+ """
23
+ Extract all possible JSONPath expressions from data
24
+
25
+ Args:
26
+ data: JSON data to analyze
27
+ max_samples: Maximum number of sample values to collect per path
28
+
29
+ Returns:
30
+ Set of JSONPath expressions found in the data
31
+ """
32
+
33
+ def _extract_recursive(obj: Any, path: str = "$"):
34
+ if isinstance(obj, dict):
35
+ for key, value in obj.items():
36
+ current_path = f"{path}.{key}"
37
+ self.paths.add(current_path)
38
+ self.types[current_path].add(type(value).__name__)
39
+
40
+ if not isinstance(value, (dict, list)) and len(self.samples[current_path]) < max_samples:
41
+ self.samples[current_path].append(str(value))
42
+
43
+ _extract_recursive(value, current_path)
44
+
45
+ elif isinstance(obj, list):
46
+ array_path = f"{path}[*]"
47
+ self.paths.add(array_path)
48
+ self.types[array_path].add("array") # Just store "array" as type
49
+ self.array_lengths[array_path] = len(obj) # Store length separately
50
+
51
+ if obj: # If array is not empty
52
+ # Process each item in the array to capture all possible structures
53
+ for item in obj:
54
+ _extract_recursive(item, array_path)
55
+
56
+ _extract_recursive(data)
57
+ return self.paths
58
+
59
+ def get_structure_report(self) -> str:
60
+ """
61
+ Generate a structure report using JSONPath notation
62
+
63
+ Returns:
64
+ Formatted string showing all paths with their types and sample values
65
+ """
66
+ report = []
67
+
68
+ for path in sorted(self.paths):
69
+ types = list(self.types[path])
70
+ samples = self.samples.get(path, [])
71
+
72
+ if "array" in types:
73
+ array_length = self.array_lengths.get(path, 0)
74
+ report.append(f"{path} -- array[{array_length}]")
75
+ elif samples:
76
+ if len(samples) > 1:
77
+ unique_count = len(set(samples))
78
+ if unique_count > 1:
79
+ sample_range = f"{samples[0]} .. {samples[-1]} ({unique_count} unique values)"
80
+ else:
81
+ sample_range = samples[0]
82
+ else:
83
+ sample_range = samples[0]
84
+ report.append(f"{path} -- {sample_range}")
85
+ else:
86
+ type_info = "/".join(types)
87
+ report.append(f"{path} -- {type_info}")
88
+
89
+ return "\n".join(report)
90
+
91
+ def get_paths_with_types(self) -> Dict[str, str]:
92
+ """
93
+ Get all paths with their associated value types
94
+
95
+ Returns:
96
+ Dictionary mapping JSONPath expressions to their value types
97
+ """
98
+ paths_with_types = {}
99
+
100
+ for path in self.paths:
101
+ types = list(self.types[path])
102
+
103
+ if "array" in types:
104
+ paths_with_types[path] = "array"
105
+ elif len(types) == 1:
106
+ # Single type
107
+ paths_with_types[path] = types[0]
108
+ elif len(types) > 1:
109
+ # Multiple types (mixed)
110
+ paths_with_types[path] = f"mixed({', '.join(sorted(types))})"
111
+ else:
112
+ # No type info available
113
+ paths_with_types[path] = "unknown"
114
+
115
+ return paths_with_types
116
+
117
+ def get_array_lengths(self) -> Dict[str, int]:
118
+ """
119
+ Get array lengths for all array paths
120
+
121
+ Returns:
122
+ Dictionary mapping array paths to their lengths
123
+ """
124
+ return self.array_lengths.copy()
125
+
126
+ def get_detailed_type_report(self, get_samples: bool = True) -> Dict[str, Dict[str, Any]]:
127
+ """
128
+ Get detailed type information for each path including samples
129
+
130
+ Returns:
131
+ Dictionary with detailed type information for each path
132
+ """
133
+ detailed_report = {}
134
+
135
+ for path in sorted(self.paths):
136
+ types = list(self.types[path])
137
+ samples = self.samples.get(path, [])
138
+
139
+ path_info = {
140
+ "types": types,
141
+ "primary_type": None,
142
+ "is_array": "array" in types,
143
+ # "samples": samples,
144
+ # "sample_count": len(samples)
145
+ }
146
+ if get_samples:
147
+ path_info["samples"] = samples
148
+ path_info["sample_count"] = len(samples)
149
+
150
+ # Add array length if it's an array
151
+ if path_info["is_array"]:
152
+ path_info["array_length"] = self.array_lengths.get(path, 0)
153
+
154
+ # Determine primary type
155
+ if path_info["is_array"]:
156
+ path_info["primary_type"] = "array"
157
+ elif len(types) == 1:
158
+ path_info["primary_type"] = types[0]
159
+ elif len(types) > 1:
160
+ path_info["primary_type"] = f"mixed({', '.join(sorted(types))})"
161
+ else:
162
+ path_info["primary_type"] = "unknown"
163
+
164
+ detailed_report[path] = path_info
165
+
166
+ return detailed_report
167
+
168
+ def compare_json_structures(self, other_data: dict) -> Dict[str, Any]:
169
+ """
170
+ Compare this analyzer's data with another JSON structure
171
+
172
+ Args:
173
+ other_data: JSON data to compare against
174
+
175
+ Returns:
176
+ Dictionary containing detailed comparison results
177
+ """
178
+ # Analyze the other data
179
+ other_analyzer = JSONPathStructureAnalyzer()
180
+ other_analyzer.extract_all_paths(other_data)
181
+
182
+ # Get paths and types for both
183
+ self_paths_types = self.get_paths_with_types()
184
+ other_paths_types = other_analyzer.get_paths_with_types()
185
+
186
+ # Get array lengths
187
+ self_array_lengths = self.get_array_lengths()
188
+ other_array_lengths = other_analyzer.get_array_lengths()
189
+
190
+ # Find path differences
191
+ self_only_paths = set(self_paths_types.keys()) - set(other_paths_types.keys())
192
+ other_only_paths = set(other_paths_types.keys()) - set(self_paths_types.keys())
193
+ common_paths = set(self_paths_types.keys()) & set(other_paths_types.keys())
194
+
195
+ # Analyze changes
196
+ type_changes = {}
197
+ value_differences = {}
198
+ array_size_changes = {}
199
+
200
+ for path in common_paths:
201
+ self_type = self_paths_types[path]
202
+ other_type = other_paths_types[path]
203
+
204
+ # Check for type changes
205
+ if self_type != other_type:
206
+ type_changes[path] = {
207
+ "old_type": self_type,
208
+ "new_type": other_type
209
+ }
210
+
211
+ # Check for array size changes (now much cleaner!)
212
+ if self_type == "array" and other_type == "array":
213
+ self_length = self_array_lengths.get(path, 0)
214
+ other_length = other_array_lengths.get(path, 0)
215
+
216
+ if self_length != other_length:
217
+ array_size_changes[path] = {
218
+ "old_size": self_length,
219
+ "new_size": other_length,
220
+ "size_change": other_length - self_length
221
+ }
222
+
223
+ # Check for value differences (non-array paths)
224
+ if self_type != "array" and other_type != "array":
225
+ self_samples = self.samples.get(path, [])
226
+ other_samples = other_analyzer.samples.get(path, [])
227
+
228
+ if self_samples and other_samples:
229
+ # Compare first sample values
230
+ if self_samples[0] != other_samples[0]:
231
+ value_differences[path] = {
232
+ "old_value": self_samples[0],
233
+ "new_value": other_samples[0],
234
+ "old_samples": self_samples,
235
+ "new_samples": other_samples
236
+ }
237
+
238
+ return {
239
+ "added_paths": {path: other_paths_types[path] for path in other_only_paths},
240
+ "removed_paths": {path: self_paths_types[path] for path in self_only_paths},
241
+ "common_paths": {path: self_paths_types[path] for path in common_paths},
242
+ "type_changes": type_changes,
243
+ "value_differences": value_differences,
244
+ "array_size_changes": array_size_changes,
245
+ "array_lengths_old": {path: length for path, length in self_array_lengths.items() if path in common_paths or path in self_only_paths},
246
+ "array_lengths_new": {path: length for path, length in other_array_lengths.items() if path in common_paths or path in other_only_paths},
247
+ "summary": {
248
+ "total_paths_old": len(self_paths_types),
249
+ "total_paths_new": len(other_paths_types),
250
+ "paths_added": len(other_only_paths),
251
+ "paths_removed": len(self_only_paths),
252
+ "paths_common": len(common_paths),
253
+ "type_changes_count": len(type_changes),
254
+ "value_changes_count": len(value_differences),
255
+ "array_size_changes_count": len(array_size_changes)
256
+ }
257
+ }
258
+
259
+ def filter_paths_excluding_keys(self, exclude_keys: set[str]) -> set[str]:
260
+ """
261
+ Filter existing paths to exclude those containing specific keys
262
+
263
+ Args:
264
+ exclude_keys: set of keys to exclude
265
+
266
+ Returns:
267
+ Filtered set of paths
268
+ """
269
+ filtered_paths = set()
270
+
271
+ for path in self.paths:
272
+ # Check if any excluded key appears in the path
273
+ path_contains_excluded = False
274
+ for exclude_key in exclude_keys:
275
+ if f".{exclude_key}" in path or f".{exclude_key}[" in path:
276
+ path_contains_excluded = True
277
+ break
278
+
279
+ if not path_contains_excluded:
280
+ filtered_paths.add(path)
281
+
282
+ return filtered_paths
283
+
284
+ def get_filtered_structure_report(self, exclude_keys: set[str] = None) -> str:
285
+ """
286
+ Generate structure report excluding specific keys
287
+
288
+ Args:
289
+ exclude_keys: set of keys to exclude from report
290
+
291
+ Returns:
292
+ Filtered structure report
293
+ """
294
+ if exclude_keys is None:
295
+ exclude_keys = set()
296
+
297
+ filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
298
+ report = []
299
+
300
+ for path in sorted(filtered_paths):
301
+ types = list(self.types[path])
302
+ samples = self.samples.get(path, [])
303
+
304
+ if "array" in types:
305
+ array_length = self.array_lengths.get(path, 0)
306
+ report.append(f"{path} -- array[{array_length}]")
307
+ elif samples:
308
+ if len(samples) > 1:
309
+ unique_count = len(set(samples))
310
+ if unique_count > 1:
311
+ sample_range = f"{samples[0]} .. {samples[-1]} ({unique_count} unique values)"
312
+ else:
313
+ sample_range = samples[0]
314
+ else:
315
+ sample_range = samples[0]
316
+ report.append(f"{path} -- {sample_range}")
317
+ else:
318
+ type_info = "/".join(types)
319
+ report.append(f"{path} -- {type_info}")
320
+
321
+ return "\n".join(report)
322
+
323
+ def get_filtered_paths_with_types(self, exclude_keys: set[str] = None) -> dict[str, str]:
324
+ """
325
+ Get paths with types excluding specific keys
326
+
327
+ Args:
328
+ exclude_keys: set of keys to exclude
329
+
330
+ Returns:
331
+ Dictionary mapping filtered JSONPath expressions to their value types
332
+ """
333
+ if exclude_keys is None:
334
+ exclude_keys = set()
335
+
336
+ filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
337
+ paths_with_types = {}
338
+
339
+ for path in filtered_paths:
340
+ types = list(self.types[path])
341
+
342
+ if "array" in types:
343
+ paths_with_types[path] = "array"
344
+ elif len(types) == 1:
345
+ paths_with_types[path] = types[0]
346
+ elif len(types) > 1:
347
+ paths_with_types[path] = f"mixed({', '.join(sorted(types))})"
348
+ else:
349
+ paths_with_types[path] = "unknown"
350
+
351
+ return paths_with_types
352
+
353
+ def get_filtered_detailed_type_report(self, exclude_keys: set[str] = None) -> dict[str, dict[str, Any]]:
354
+ """
355
+ Get detailed type information excluding specific keys
356
+
357
+ Args:
358
+ exclude_keys: set of keys to exclude
359
+
360
+ Returns:
361
+ Dictionary with detailed type information for filtered paths
362
+ """
363
+ if exclude_keys is None:
364
+ exclude_keys = set()
365
+
366
+ filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
367
+ detailed_report = {}
368
+
369
+ for path in sorted(filtered_paths):
370
+ types = list(self.types[path])
371
+ samples = self.samples.get(path, [])
372
+
373
+ path_info = {
374
+ "types": types,
375
+ "primary_type": None,
376
+ "is_array": "array" in types,
377
+ "samples": samples,
378
+ "sample_count": len(samples)
379
+ }
380
+
381
+ if path_info["is_array"]:
382
+ path_info["array_length"] = self.array_lengths.get(path, 0)
383
+
384
+ if path_info["is_array"]:
385
+ path_info["primary_type"] = "array"
386
+ elif len(types) == 1:
387
+ path_info["primary_type"] = types[0]
388
+ elif len(types) > 1:
389
+ path_info["primary_type"] = f"mixed({', '.join(sorted(types))})"
390
+ else:
391
+ path_info["primary_type"] = "unknown"
392
+
393
+ detailed_report[path] = path_info
394
+
395
+ return detailed_report
396
+
397
+
398
+ def analyze_with_jsonpath(data: dict) -> str:
399
+ """
400
+ Analyze JSON structure using JSONPath
401
+
402
+ Args:
403
+ data: Dictionary containing JSON data to analyze
404
+
405
+ Returns:
406
+ Formatted structure report string
407
+ """
408
+ analyzer = JSONPathStructureAnalyzer()
409
+ analyzer.extract_all_paths(data)
410
+ return analyzer.get_structure_report()
411
+
412
+
413
+ def analyze_with_jsonpath_types(data: dict) -> Dict[str, str]:
414
+ """
415
+ Analyze JSON structure and return paths with their types
416
+
417
+ Args:
418
+ data: Dictionary containing JSON data to analyze
419
+
420
+ Returns:
421
+ Dictionary mapping JSONPath expressions to their value types
422
+ """
423
+ analyzer = JSONPathStructureAnalyzer()
424
+ analyzer.extract_all_paths(data)
425
+ return analyzer.get_paths_with_types()
426
+
427
+
428
+ def analyze_with_jsonpath_detailed(data: dict) -> Dict[str, Dict[str, Any]]:
429
+ """
430
+ Analyze JSON structure and return detailed type information
431
+
432
+ Args:
433
+ data: Dictionary containing JSON data to analyze
434
+
435
+ Returns:
436
+ Dictionary with detailed type information for each path
437
+ """
438
+ analyzer = JSONPathStructureAnalyzer()
439
+ analyzer.extract_all_paths(data)
440
+ return analyzer.get_detailed_type_report()
441
+
442
+
443
+ def compare_json_with_jsonpath_structures(old_data: dict, new_data: dict, print_report: bool = True) -> Dict[str, Any]:
444
+ """
445
+ Compare two JSON structures using JSONPath analysis
446
+
447
+ Args:
448
+ old_data: Original JSON structure
449
+ new_data: New JSON structure to compare against
450
+ print_report: Whether to print the comparison report
451
+
452
+ Returns:
453
+ Dictionary containing detailed comparison results
454
+ """
455
+ # Analyze old structure
456
+ old_analyzer = JSONPathStructureAnalyzer()
457
+ old_analyzer.extract_all_paths(old_data)
458
+
459
+ # Compare with new structure
460
+ comparison = old_analyzer.compare_json_structures(new_data)
461
+
462
+ if print_report:
463
+ print_comparison_report(comparison)
464
+
465
+ return comparison
466
+
467
+
468
+ def print_comparison_report(comparison: Dict[str, Any]):
469
+ """
470
+ Print a formatted comparison report
471
+
472
+ Args:
473
+ comparison: Result from compare_json_structures method
474
+ """
475
+ print("=== JSON STRUCTURE COMPARISON REPORT ===\n")
476
+
477
+ # Summary
478
+ summary = comparison["summary"]
479
+ print(f"📊 SUMMARY:")
480
+ print(f" Old structure: {summary['total_paths_old']} paths")
481
+ print(f" New structure: {summary['total_paths_new']} paths")
482
+ print(f" Added: {summary['paths_added']} paths")
483
+ print(f" Removed: {summary['paths_removed']} paths")
484
+ print(f" Common: {summary['paths_common']} paths")
485
+ print(f" Type changes: {summary['type_changes_count']}")
486
+ print(f" Value changes: {summary['value_changes_count']}")
487
+ print(f" Array size changes: {summary['array_size_changes_count']}")
488
+ print()
489
+
490
+ # Added paths
491
+ if comparison["added_paths"]:
492
+ print("➕ ADDED PATHS:")
493
+ for path, type_info in sorted(comparison["added_paths"].items()):
494
+ print(f" {path} ({type_info})")
495
+ print()
496
+
497
+ # Removed paths
498
+ if comparison["removed_paths"]:
499
+ print("➖ REMOVED PATHS:")
500
+ for path, type_info in sorted(comparison["removed_paths"].items()):
501
+ print(f" {path} ({type_info})")
502
+ print()
503
+
504
+ # Type changes
505
+ if comparison["type_changes"]:
506
+ print("🔄 TYPE CHANGES:")
507
+ for path, change in sorted(comparison["type_changes"].items()):
508
+ print(f" {path}: {change['old_type']} → {change['new_type']}")
509
+ print()
510
+
511
+ # Array size changes
512
+ if comparison["array_size_changes"]:
513
+ print("📏 ARRAY SIZE CHANGES:")
514
+ for path, change in sorted(comparison["array_size_changes"].items()):
515
+ size_change = change['size_change']
516
+ direction = "📈" if size_change > 0 else "📉"
517
+ print(f" {direction} {path}: {change['old_size']} → {change['new_size']} (Δ{size_change:+d})")
518
+ print()
519
+
520
+ # Value differences
521
+ if comparison["value_differences"]:
522
+ print("💱 VALUE CHANGES:")
523
+ for path, change in sorted(comparison["value_differences"].items()):
524
+ print(f" {path}: '{change['old_value']}' → '{change['new_value']}'")
525
+ print()
526
+
527
+
528
+ def analyze_dict_list_simple(dict_list: list[dict], exclude_keys: set[str] = None) -> list[dict[str, Any]]:
529
+ """
530
+ Analyze each dict separately and return list of results
531
+
532
+ Args:
533
+ dict_list: list of dictionaries to analyze
534
+ exclude_keys: set of keys to exclude from analysis
535
+
536
+ Returns:
537
+ list of individual analysis results
538
+ """
539
+ if exclude_keys is None:
540
+ exclude_keys = set()
541
+
542
+ results = []
543
+
544
+ for i, data_dict in enumerate(dict_list):
545
+ analyzer = JSONPathStructureAnalyzer()
546
+ analyzer.extract_all_paths(data_dict)
547
+
548
+ result = {
549
+ "index": i,
550
+ "paths_with_types": analyzer.get_filtered_paths_with_types(exclude_keys),
551
+ "detailed_report": analyzer.get_filtered_detailed_type_report(exclude_keys),
552
+ "array_lengths": {k: v for k, v in analyzer.get_array_lengths().items()
553
+ if k in analyzer.filter_paths_excluding_keys(exclude_keys)},
554
+ "structure_report": analyzer.get_filtered_structure_report(exclude_keys)
555
+ }
556
+ results.append(result)
557
+
558
+ return results
my_ghost_writer/text_parsers2.py CHANGED
@@ -1,16 +1,17 @@
1
  from datetime import datetime
 
2
 
3
- import spacy
4
  import nltk
5
  # pynflect needed to avoid different inflection
6
  import pyinflect
7
- from typing import Any, Optional
8
  from fastapi import HTTPException
9
 
10
- from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS, NLTK_DATA
11
  from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
12
  from my_ghost_writer.thesaurus import wn
13
- from my_ghost_writer.type_hints import WordSynonymResult, ContextInfo, SynonymGroup
 
14
 
15
 
16
  custom_synonym_handler = CustomSynonymHandler()
@@ -40,11 +41,20 @@ def is_nlp_available() -> bool:
40
  return nlp is not None
41
 
42
 
43
- def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[WordSynonymResult]:
44
  """
45
- Finds synonyms for all eligible words within a selected text span.
46
  It analyzes the span, filters for meaningful words (nouns, verbs, etc.),
47
- and returns a list of synonym results for each.
 
 
 
 
 
 
 
 
 
48
  """
49
  if nlp is None:
50
  app_logger.error(
@@ -62,7 +72,7 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
62
  return []
63
 
64
  # Define which POS tags are eligible for synonym lookup
65
- results: list[WordSynonymResult] = []
66
 
67
  for token in span:
68
  # Process only if the token is an eligible part of speech and not a stop word or punctuation
@@ -73,18 +83,18 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
73
  text, token.idx, token.idx + len(token.text), token.text
74
  )
75
 
76
- # 2. Get synonym groups using the token's lemma for a better search
77
- synonym_groups_list = process_synonym_groups(context_info_dict['lemma'], context_info_dict)
78
 
79
- # 3. If we find synonyms, build the result object for this word
80
- if synonym_groups_list:
81
  # Restructure dicts into Pydantic models for type safety
82
  context_info_model = ContextInfo(
83
- pos=context_info_dict['pos'],
84
- sentence=context_info_dict['context_sentence'],
85
- grammatical_form=context_info_dict['tag'],
86
- context_words=context_info_dict['context_words'],
87
- dependency=context_info_dict['dependency']
88
  )
89
  local_start_idx = token.idx - start_idx
90
  local_end_idx = local_start_idx + len(token.text)
@@ -92,17 +102,17 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
92
  sliced_word = sliced_sentence[local_start_idx:local_end_idx]
93
  assert sliced_word == token.text, (f"Mismatch! sliced_word ({sliced_word}) != token.text ({token.text}), but these substrings should be equal.\n"
94
  f" start_idx:{start_idx}, End_word:{end_idx}. local_start_idx:{local_start_idx}, local_end_idx:{local_end_idx}.")
95
- word_result = WordSynonymResult(
96
  original_word=token.text,
97
  original_indices={"start": local_start_idx, "end": local_end_idx},
98
  context_info=context_info_model,
99
- synonym_groups=[SynonymGroup(**sg) for sg in synonym_groups_list],
100
  debug_info={
101
  "spacy_token_indices": {
102
- "start": context_info_dict['char_start'],
103
- "end": context_info_dict['char_end']
104
  },
105
- "lemma": context_info_dict['lemma']
106
  }
107
  )
108
  results.append(word_result)
@@ -116,7 +126,19 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
116
 
117
 
118
  def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int, target_word: str) -> dict[str, Any]:
119
- """Extract grammatical and contextual information using character indices"""
 
 
 
 
 
 
 
 
 
 
 
 
120
  if nlp is None:
121
  raise HTTPException(status_code=500, detail="spaCy model not available")
122
 
@@ -160,20 +182,20 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
160
  context_words = [t.text for t in sentence_tokens[context_start:context_end]]
161
 
162
  return {
163
- 'word': target_token.text,
164
- 'lemma': target_token.lemma_,
165
- 'pos': target_token.pos_,
166
- 'tag': target_token.tag_,
167
- 'is_title': target_token.is_title,
168
- 'is_upper': target_token.is_upper,
169
- 'is_lower': target_token.is_lower,
170
- 'dependency': target_token.dep_,
171
- 'context_sentence': target_token.sent.text,
172
- 'context_words': context_words,
173
- 'sentence_position': target_position_in_sentence,
174
- 'char_start': target_token.idx,
175
- 'char_end': target_token.idx + len(target_token.text),
176
- 'original_indices': {'start': start_idx, 'end': end_idx}
177
  }
178
 
179
  except Exception as indices_ex:
@@ -182,58 +204,115 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
182
 
183
 
184
  def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
185
- """Get synonyms from wn with optional POS filtering.
186
- Includes custom synonyms with a flag. Also performs a reverse lookup."""
 
 
 
 
 
187
 
188
- # 1. Check for custom synonyms in in-memory store
 
 
 
189
  word_lower = word.lower()
190
- synonyms_by_sense: list[dict[str, Any]] = [] # Initialize the list here
191
-
192
- # 1. Custom Synonym Lookup and Preparation
193
- custom_synset = None # Initialize to None
194
- # 1. Direct Lookup: Check if the word is directly in custom_synonyms
195
- related_synonyms = custom_synonym_handler.get_related(word_lower, "synonym")
196
- if related_synonyms:
197
- app_logger.info(f"found custom_synonyms:{related_synonyms} by word:{word_lower}!")
198
- synonyms_list: list[dict[str, Any]] = []
199
- for related in related_synonyms:
200
- words = related["words"]
201
- for word_from_related_words in words:
202
- synonyms_list.append({"synonym": word_from_related_words, "is_custom": True, "definition": related.get("definition")})
203
- if synonyms_list:
204
- custom_synset = {
205
- 'definition': 'User-defined synonym.',
206
- 'examples': [],
207
- 'synonyms': synonyms_list
208
- }
209
- if pos_tag:
210
- custom_synset["pos"] = pos_tag
211
-
212
- # 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
213
- reverse_lookup_words = custom_synonym_handler.reverse_lookup(word_lower)
214
-
215
- if reverse_lookup_words:
216
- app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{reverse_lookup_words}'")
217
- # Found a reverse match!
218
- # The reverse_lookup return the original word, not a list of synonyms
219
- synonyms_list: list[dict[str, Any]] = [{"synonym": reverse_word, "is_custom": True} for reverse_word in reverse_lookup_words]
220
-
221
- custom_synset = {
222
- 'definition': f'User-defined synonym (reverse match for "{word}").',
223
- 'examples': [],
224
- 'synonyms': synonyms_list
225
- }
226
- if pos_tag:
227
- custom_synset["pos"] = pos_tag
228
 
229
- # 3. WordNet Lookup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  try:
231
  # Map spaCy POS to wn POS
232
  pos_map = {
233
- 'NOUN': wn.NOUN,
234
- 'VERB': wn.VERB,
235
- 'ADJ': wn.ADJ,
236
- 'ADV': wn.ADV
237
  }
238
 
239
  # Get all synsets for the word
@@ -243,38 +322,34 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
243
  if pos_tag and pos_tag in pos_map:
244
  synsets = [s for s in synsets if s.pos() == pos_map[pos_tag]]
245
 
 
246
  for synset in synsets:
247
- sense_data = {
248
- 'definition': synset.definition(),
249
- 'examples': synset.examples()[:2], # Limit examples
250
- 'synonyms': [],
251
- }
252
- # Add pos only if it's available
253
- syn_pos = synset.pos()
254
- if syn_pos:
255
- sense_data['pos'] = syn_pos
256
-
257
- # Use a set to avoid duplicate synonyms from different lemmas in the same synset
258
- unique_synonyms = set()
259
  for lemma in synset.lemmas():
260
- synonym = lemma.name().replace('_', ' ')
261
- if synonym.lower() != word.lower():
262
- unique_synonyms.add(synonym)
263
-
264
- if unique_synonyms:
265
- # add synonyms (without is_custom) since these are WordNet synonyms
266
- sense_data['synonyms'] = sorted(list(unique_synonyms))
267
- synonyms_by_sense.append(sense_data)
 
 
 
 
 
 
 
268
 
269
  except Exception as ex1:
270
- app_logger.error(f"Error getting wn synonyms: {ex1}")
271
- raise HTTPException(status_code=500, detail=f"Error retrieving synonyms: {str(ex1)}")
272
-
273
- # 4. Combine Custom and WordNet Synsets
274
- if custom_synset:
275
- synonyms_by_sense.insert(0, custom_synset) # Add custom synset at the beginning
276
 
277
- return synonyms_by_sense
278
 
279
 
280
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
@@ -283,24 +358,24 @@ def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
283
  if nlp is None:
284
  return synonym
285
 
286
- pos = original_token_info.get('pos')
287
- tag = original_token_info.get('tag')
288
 
289
  # Handle capitalization first using .get() for safety
290
- if original_token_info.get('is_title'):
291
  synonym = synonym.title() # .title() is better for multi-word phrases
292
- elif original_token_info.get('is_upper'):
293
  synonym = synonym.upper()
294
- elif original_token_info.get('is_lower', True): # Default to lower
295
  synonym = synonym.lower()
296
 
297
  # Handle grammatical inflection
298
  try:
299
  # Define all tags that require inflection in one place
300
  inflection_tags = {
301
- 'NOUN': ['NNS', 'NNPS'],
302
- 'VERB': ['VBD', 'VBN', 'VBZ', 'VBG'],
303
- 'ADJ': ['JJR', 'JJS']
304
  }
305
 
306
  # Single check for all inflection cases
@@ -320,50 +395,58 @@ def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
320
  return synonym
321
 
322
 
323
- def process_synonym_groups(word: str, context_info: dict[str, Any]) -> list[dict[str, Any]]:
324
- """Process synonym groups with inflection matching"""
325
- # Get synonyms from wn
 
 
 
 
 
 
 
 
326
  t0 = datetime.now()
327
- # Get synonyms from wn using the lemma
328
- synonyms_by_sense = get_wordnet_synonyms(context_info['lemma'], context_info['pos'])
329
  t1 = datetime.now()
330
  duration = (t1 - t0).total_seconds()
331
  app_logger.info(f"# 1/Got get_wordnet_synonyms result with '{word}' word in {duration:.3f}s.")
332
 
333
- if not synonyms_by_sense:
334
  return []
335
 
336
- # Process each synonym group
337
- processed_synonyms = []
338
- for sense in synonyms_by_sense:
339
- processed_sense = {
340
- "definition": sense['definition'],
341
- "examples": sense['examples'],
342
- "wordnet_pos": sense['pos'],
343
- "synonyms": []
344
- }
345
-
346
- for synonym in sense['synonyms']:
347
- # Get both the base form and inflected form
348
- app_logger.info("## synonym ##")
349
- app_logger.info(type(synonym))
350
- app_logger.info(synonym)
351
- synonym_str = synonym
352
- if isinstance(synonym, dict):
353
- synonym_str = synonym["synonym"]
354
-
355
- base_form = synonym_str
356
- app_logger.info("## synonym ##")
357
- app_logger.info(type(synonym_str))
358
- app_logger.info(synonym_str)
359
- inflected_form = inflect_synonym(synonym_str, context_info)
360
-
361
- processed_sense["synonyms"].append({
362
- "base_form": base_form,
363
- "inflected_form": inflected_form,
364
- "matches_context": inflected_form.lower() != base_form.lower()
365
- })
366
-
367
- processed_synonyms.append(processed_sense)
368
-
369
- return processed_synonyms
 
1
  from datetime import datetime
2
+ from typing import Any, Optional
3
 
 
4
  import nltk
5
  # pynflect needed to avoid different inflection
6
  import pyinflect
7
+ import spacy
8
  from fastapi import HTTPException
9
 
10
+ from my_ghost_writer.constants import ELIGIBLE_POS, NLTK_DATA, SPACY_MODEL_NAME, app_logger
11
  from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
12
  from my_ghost_writer.thesaurus import wn
13
+ from my_ghost_writer.type_hints import ContextInfo, RelatedWordGroup, RelatedWordOption, RelatedWordWordResult, \
14
+ TermRelationships
15
 
16
 
17
  custom_synonym_handler = CustomSynonymHandler()
 
41
  return nlp is not None
42
 
43
 
44
+ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[RelatedWordWordResult]:
45
  """
46
+ Finds related words for all eligible words within a selected text span.
47
  It analyzes the span, filters for meaningful words (nouns, verbs, etc.),
48
+ and returns a list of related word results for each.
49
+ Raises: HTTPException: If the spaCy model is unavailable.
50
+
51
+ Args:
52
+ text: The input text (str).
53
+ start_idx: The start index of the phrase within the text (int).
54
+ end_idx: The end index of the phrase within the text (int).
55
+
56
+ Returns:
57
+ A list of RelatedWordWordResult objects, representing the related words for each eligible word (list[RelatedWordWordResult]).
58
  """
59
  if nlp is None:
60
  app_logger.error(
 
72
  return []
73
 
74
  # Define which POS tags are eligible for synonym lookup
75
+ results: list[RelatedWordWordResult] = []
76
 
77
  for token in span:
78
  # Process only if the token is an eligible part of speech and not a stop word or punctuation
 
83
  text, token.idx, token.idx + len(token.text), token.text
84
  )
85
 
86
+ # 2. Get related word groups using the token's lemma for a better search
87
+ related_word_groups_list = process_synonym_groups(context_info_dict["lemma"], context_info_dict)
88
 
89
+ # 3. If we find related words, build the result object for this word
90
+ if related_word_groups_list:
91
  # Restructure dicts into Pydantic models for type safety
92
  context_info_model = ContextInfo(
93
+ pos=context_info_dict["pos"],
94
+ sentence=context_info_dict["context_sentence"],
95
+ grammatical_form=context_info_dict["tag"],
96
+ context_words=context_info_dict["context_words"],
97
+ dependency=context_info_dict["dependency"],
98
  )
99
  local_start_idx = token.idx - start_idx
100
  local_end_idx = local_start_idx + len(token.text)
 
102
  sliced_word = sliced_sentence[local_start_idx:local_end_idx]
103
  assert sliced_word == token.text, (f"Mismatch! sliced_word ({sliced_word}) != token.text ({token.text}), but these substrings should be equal.\n"
104
  f" start_idx:{start_idx}, End_word:{end_idx}. local_start_idx:{local_start_idx}, local_end_idx:{local_end_idx}.")
105
+ word_result = RelatedWordWordResult(
106
  original_word=token.text,
107
  original_indices={"start": local_start_idx, "end": local_end_idx},
108
  context_info=context_info_model,
109
+ related_word_groups=related_word_groups_list,
110
  debug_info={
111
  "spacy_token_indices": {
112
+ "start": context_info_dict["char_start"],
113
+ "end": context_info_dict["char_end"],
114
  },
115
+ "lemma": context_info_dict["lemma"]
116
  }
117
  )
118
  results.append(word_result)
 
126
 
127
 
128
  def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int, target_word: str) -> dict[str, Any]:
129
+ """
130
+ Extract grammatical and contextual information using character indices.
131
+ Raises: HTTPException: If the spaCy model is unavailable or if the indices are invalid.
132
+
133
+ Args:
134
+ text: The input text (str).
135
+ start_idx: The start index of the word within the text (int).
136
+ end_idx: The end index of the word within the text (int).
137
+ target_word: The target word (str).
138
+
139
+ Returns:
140
+ A dictionary containing contextual information about the word (dict[str, Any).
141
+ """
142
  if nlp is None:
143
  raise HTTPException(status_code=500, detail="spaCy model not available")
144
 
 
182
  context_words = [t.text for t in sentence_tokens[context_start:context_end]]
183
 
184
  return {
185
+ "word": target_token.text,
186
+ "lemma": target_token.lemma_,
187
+ "pos": target_token.pos_,
188
+ "tag": target_token.tag_,
189
+ "is_title": target_token.is_title,
190
+ "is_upper": target_token.is_upper,
191
+ "is_lower": target_token.is_lower,
192
+ "dependency": target_token.dep_,
193
+ "context_sentence": target_token.sent.text,
194
+ "context_words": context_words,
195
+ "sentence_position": target_position_in_sentence,
196
+ "char_start": target_token.idx,
197
+ "char_end": target_token.idx + len(target_token.text),
198
+ "original_indices": {"start": start_idx, "end": end_idx},
199
  }
200
 
201
  except Exception as indices_ex:
 
204
 
205
 
206
  def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
207
+ """
208
+ Gets related words from WordNet and custom synonym handler,
209
+ returning a list of dictionaries containing the raw data, grouped by relation type.
210
+
211
+ Args:
212
+ word: The word to get related words for (str).
213
+ pos_tag: An optional part-of-speech tag to filter WordNet results (Optional[str]).
214
 
215
+ Returns:
216
+ A list of dictionaries, where each dictionary represents a group of related words (list[dict[str, Any]]).
217
+ """
218
+ related_word_groups_raw: list[dict[str, Any]] = []
219
  word_lower = word.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
+ def _get_related_words(related_object, relation_type: TermRelationships, inner_word_lower: str):
222
+ related_words = []
223
+
224
+ if relation_type == TermRelationships.SYNONYM:
225
+ # related_object is a Synset
226
+ for local_lemma in related_object.lemmas():
227
+ lemma_name = local_lemma.name().replace("_", " ")
228
+ if lemma_name.lower() != inner_word_lower:
229
+ related_words.append({
230
+ "base_form": lemma_name
231
+ })
232
+ elif relation_type == TermRelationships.ANTONYM:
233
+ # related_object is a Lemma
234
+ for ant in related_object.antonyms():
235
+ ant_name = ant.name().replace("_", " ")
236
+ if ant_name.lower() != inner_word_lower:
237
+ related_words.append({
238
+ "base_form": ant_name
239
+ })
240
+ else:
241
+ # related_object is a Synset
242
+ # Get related synsets from the appropriate method
243
+ relation_methods = {
244
+ TermRelationships.HYPERNYM: related_object.hypernyms,
245
+ TermRelationships.HYPONYM: related_object.hyponyms,
246
+ TermRelationships.MERONYM: lambda: related_object.member_meronyms() + related_object.substance_meronyms() + related_object.part_meronyms(),
247
+ TermRelationships.HOLONYM: lambda: related_object.member_holonyms() + related_object.substance_holonyms() + related_object.part_holonyms(),
248
+ TermRelationships.ALSO_SEE: related_object.also_sees,
249
+ TermRelationships.CAUSE: related_object.causes,
250
+ # TermRelationships.DERIVATIONALLY_RELATED_FORM: related_object.derivationally_related_forms,
251
+ # TermRelationships.ENTAILMENT: related_object.entails,
252
+ # TermRelationships.PERTAINYM: related_object.pertainyms,
253
+ TermRelationships.SIMILAR_TO: related_object.similar_tos,
254
+ }
255
+ get_words_fn = relation_methods.get(relation_type)
256
+ if get_words_fn:
257
+ for related_synset in get_words_fn():
258
+ # Some methods return Lemma objects, handle both cases
259
+ if hasattr(related_synset, "lemmas"):
260
+ for local_lemma in related_synset.lemmas():
261
+ lemma_name = local_lemma.name().replace("_", " ")
262
+ if lemma_name.lower() != inner_word_lower:
263
+ related_words.append({
264
+ "base_form": lemma_name,
265
+ # "is_custom": False,
266
+ })
267
+ elif hasattr(related_synset, "name"):
268
+ lemma_name = related_synset.name().replace("_", " ")
269
+ if lemma_name.lower() != inner_word_lower:
270
+ related_words.append({
271
+ "base_form": lemma_name,
272
+ # "is_custom": False,
273
+ })
274
+
275
+ if related_words:
276
+ return {
277
+ "relation_type": relation_type,
278
+ "source": "wordnet",
279
+ "definition": related_object.definition() if hasattr(related_object, "definition") else "",
280
+ "examples": related_object.examples()[:2] if hasattr(related_object, "examples") else [],
281
+ "wordnet_pos": related_object.pos() if hasattr(related_object, "pos") else None,
282
+ "related_words": related_words,
283
+ }
284
+ return None
285
+
286
+ # 1. Custom Related Word Lookup (all relationships)
287
+ for rel_type in TermRelationships:
288
+ custom_groups = custom_synonym_handler.get_related(word_lower, rel_type)
289
+ if custom_groups:
290
+ for related in custom_groups:
291
+ words = related["words"]
292
+ definition = related.get("definition", "")
293
+ related_word_options = []
294
+ for word_from_related_words in words:
295
+ related_word_options.append({
296
+ "base_form": word_from_related_words,
297
+ "is_custom": True,
298
+ "definition": definition,
299
+ })
300
+ related_word_groups_raw.append({
301
+ "relation_type": rel_type,
302
+ "source": "custom",
303
+ "definition": definition,
304
+ "examples": [],
305
+ "wordnet_pos": None,
306
+ "related_words": related_word_options,
307
+ })
308
+ # 2. WordNet Lookup
309
  try:
310
  # Map spaCy POS to wn POS
311
  pos_map = {
312
+ "NOUN": wn.NOUN,
313
+ "VERB": wn.VERB,
314
+ "ADJ": wn.ADJ,
315
+ "ADV": wn.ADV,
316
  }
317
 
318
  # Get all synsets for the word
 
322
  if pos_tag and pos_tag in pos_map:
323
  synsets = [s for s in synsets if s.pos() == pos_map[pos_tag]]
324
 
325
+ # Process each synset and its relations
326
  for synset in synsets:
327
+ result = _get_related_words(synset, TermRelationships.SYNONYM, word_lower)
328
+ if result:
329
+ related_word_groups_raw.append(result)
330
+ # todo: check if it's possible to remove the first 'IF result:...' and move the second one
 
 
 
 
 
 
 
 
331
  for lemma in synset.lemmas():
332
+ result = _get_related_words(lemma, TermRelationships.ANTONYM, word_lower)
333
+ if result:
334
+ related_word_groups_raw.append(result)
335
+ for rel_type in [
336
+ TermRelationships.HYPERNYM, TermRelationships.HYPONYM, TermRelationships.MERONYM,
337
+ TermRelationships.HOLONYM, TermRelationships.ALSO_SEE, TermRelationships.CAUSE,
338
+ # todo: try to understand how to fix the related missing methods
339
+ # TermRelationships.DERIVATIONALLY_RELATED_FORM,
340
+ # TermRelationships.ENTAILMENT,
341
+ # TermRelationships.PERTAINYM,
342
+ TermRelationships.SIMILAR_TO
343
+ ]:
344
+ result = _get_related_words(synset, rel_type, word_lower)
345
+ if result:
346
+ related_word_groups_raw.append(result)
347
 
348
  except Exception as ex1:
349
+ app_logger.error(f"Error getting wn synonyms: '{ex1}' with: word:{type(word)}, '{word}', pos_tag: {type(pos_tag)}, '{pos_tag}'")
350
+ raise HTTPException(status_code=500, detail=f"Error retrieving related words: '{str(ex1)}'")
 
 
 
 
351
 
352
+ return related_word_groups_raw
353
 
354
 
355
  def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
 
358
  if nlp is None:
359
  return synonym
360
 
361
+ pos = original_token_info.get("pos")
362
+ tag = original_token_info.get("tag")
363
 
364
  # Handle capitalization first using .get() for safety
365
+ if original_token_info.get("is_title"):
366
  synonym = synonym.title() # .title() is better for multi-word phrases
367
+ elif original_token_info.get("is_upper"):
368
  synonym = synonym.upper()
369
+ elif original_token_info.get("is_lower", True): # Default to lower
370
  synonym = synonym.lower()
371
 
372
  # Handle grammatical inflection
373
  try:
374
  # Define all tags that require inflection in one place
375
  inflection_tags = {
376
+ "NOUN": ["NNS", "NNPS"],
377
+ "VERB": ["VBD", "VBN", "VBZ", "VBG"],
378
+ "ADJ": ["JJR", "JJS"],
379
  }
380
 
381
  # Single check for all inflection cases
 
395
  return synonym
396
 
397
 
398
+ def process_synonym_groups(word: str, context_info: dict[str, Any]) -> list[RelatedWordGroup]:
399
+ """Process given related word groups with inflection matching
400
+
401
+ Args:
402
+ word (str): the word
403
+ context_info (dict[str, Any]): the original form of data
404
+
405
+ Returns:
406
+ list[RelatedWordGroup]: List of the processed related words
407
+ """
408
+ # Get related words from wn
409
  t0 = datetime.now()
410
+ # Get related words from wn using the lemma
411
+ related_words_raw = get_wordnet_synonyms(context_info["lemma"], context_info["pos"])
412
  t1 = datetime.now()
413
  duration = (t1 - t0).total_seconds()
414
  app_logger.info(f"# 1/Got get_wordnet_synonyms result with '{word}' word in {duration:.3f}s.")
415
 
416
+ if not related_words_raw:
417
  return []
418
 
419
+ # Process each related word group
420
+ processed_groups: list[RelatedWordGroup] = []
421
+ for related_group in related_words_raw:
422
+ app_logger.info(f"related_group:'{related_group}'")
423
+ relation_type = related_group["relation_type"]
424
+ definition = related_group.get("definition", "")
425
+ examples = related_group.get("examples", [])
426
+ wordnet_pos = related_group.get("wordnet_pos")
427
+ related_words = related_group["related_words"]
428
+ processed_options: list[RelatedWordOption] = []
429
+
430
+ for related_word in related_words:
431
+ base_form = related_word["base_form"]
432
+ inflected_form = inflect_synonym(base_form, context_info)
433
+
434
+ related_word_option = RelatedWordOption(
435
+ base_form=base_form,
436
+ inflected_form=inflected_form,
437
+ matches_context=inflected_form.lower() != base_form.lower()
438
+ )
439
+ if "is_custom" in related_word:
440
+ related_word_option.is_custom = related_word["is_custom"]
441
+ processed_options.append(related_word_option)
442
+ app_logger.info(f"wordnet_pos:{type(wordnet_pos)}, '{wordnet_pos}'")
443
+ processed_groups.append(
444
+ RelatedWordGroup(
445
+ relation_type=relation_type,
446
+ definition=definition,
447
+ examples=examples,
448
+ related_words=processed_options,
449
+ wordnet_pos=wordnet_pos
450
+ )
451
+ )
452
+ return processed_groups
my_ghost_writer/type_hints.py CHANGED
@@ -1,23 +1,41 @@
1
- from typing import Any, TypedDict, Optional, Literal
2
- from pydantic import BaseModel, field_validator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  class RelatedEntry(BaseModel):
6
- type: Literal["synonym", "antonym", "homonym", "homophone", "homograph"]
 
7
  words: list[str]
8
- definition: Optional[str] = None # Definition is now within RelatedEntry
9
 
10
 
11
- class CustomSynonymRequest(BaseModel):
12
  word: str
13
  related: list[RelatedEntry]
14
 
15
- # @field_validator("synonyms")
16
- # def synonyms_must_not_be_empty(cls, v):
17
- # if not v:
18
- # raise ValueError("Synonym list cannot be empty.")
19
- # return v
20
-
21
 
22
  class SynonymInfo(TypedDict):
23
  synonym: str
@@ -50,17 +68,19 @@ class RequestQueryThesaurusInflatedBody(BaseModel):
50
  word: str
51
 
52
 
53
- class SynonymOption(BaseModel):
54
  base_form: str
55
  inflected_form: str
56
  matches_context: bool
 
57
 
58
 
59
- class SynonymGroup(BaseModel):
 
60
  definition: str
61
  examples: list[str]
62
- wordnet_pos: str
63
- synonyms: list[SynonymOption]
64
 
65
 
66
  class ContextInfo(BaseModel):
@@ -76,25 +96,40 @@ class SingleWordSynonymResponse(BaseModel):
76
  original_word: str
77
  original_indices: dict[str, int]
78
  context_info: ContextInfo
79
- synonym_groups: list[SynonymGroup]
80
  message: Optional[str] = None
81
  debug_info: Optional[dict[str, Any]] = None
82
 
83
 
84
- class WordSynonymResult(BaseModel):
85
  original_word: str
86
  original_indices: dict[str, int]
87
  context_info: ContextInfo
88
- synonym_groups: list[SynonymGroup]
 
89
  debug_info: Optional[dict[str, Any]] = None
90
 
91
 
92
- class MultiWordSynonymResponse(BaseModel):
93
  success: bool
94
  original_phrase: str
95
  original_indices: dict[str, int]
96
- results: list[WordSynonymResult]
97
  message: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
 
100
  class HealthCheckResponse(BaseModel):
@@ -105,7 +140,7 @@ class HealthCheckResponse(BaseModel):
105
 
106
  class InputTextRow(TypedDict):
107
  """
108
- TypedDict for input text row.
109
  """
110
  idxRow: int
111
  text: str
 
1
+ from decimal import Decimal
2
+ from enum import Enum
3
+ from typing import Any, Optional, TypedDict, Union
4
+
5
+ from pydantic import BaseModel, field_validator, Field, field_serializer
6
+
7
+ decimal_places = 4
8
+
9
+
10
+ class TermRelationships(str, Enum):
11
+ """
12
+ An enumeration representing the possible relationships between terms.
13
+ """
14
+ ALSO_SEE = "also_see"
15
+ ANTONYM = "antonym"
16
+ CAUSE = "cause"
17
+ DERIVATIONALLY_RELATED_FORM = "derivationally_related_form"
18
+ ENTAILMENT = "entailment"
19
+ HOLONYM = "holonym"
20
+ HYPERNYM = "hypernym"
21
+ HYPONYM = "hyponym"
22
+ MERONYM = "meronym"
23
+ PERTAINYM = "pertainym"
24
+ SIMILAR_TO = "similar_to"
25
+ SYNONYM = "synonym"
26
 
27
 
28
  class RelatedEntry(BaseModel):
29
+ """Represents a related entry for a custom-related word."""
30
+ type: TermRelationships
31
  words: list[str]
32
+ definition: Optional[str] = None
33
 
34
 
35
+ class CustomRelatedWordRequest(BaseModel):
36
  word: str
37
  related: list[RelatedEntry]
38
 
 
 
 
 
 
 
39
 
40
  class SynonymInfo(TypedDict):
41
  synonym: str
 
68
  word: str
69
 
70
 
71
+ class RelatedWordOption(BaseModel):
72
  base_form: str
73
  inflected_form: str
74
  matches_context: bool
75
+ is_custom: Optional[bool] = None
76
 
77
 
78
+ class RelatedWordGroup(BaseModel):
79
+ relation_type: TermRelationships
80
  definition: str
81
  examples: list[str]
82
+ wordnet_pos: Optional[str]
83
+ related_words: list[RelatedWordOption]
84
 
85
 
86
  class ContextInfo(BaseModel):
 
96
  original_word: str
97
  original_indices: dict[str, int]
98
  context_info: ContextInfo
99
+ synonym_groups: list[RelatedWordGroup]
100
  message: Optional[str] = None
101
  debug_info: Optional[dict[str, Any]] = None
102
 
103
 
104
+ class RelatedWordWordResult(BaseModel):
105
  original_word: str
106
  original_indices: dict[str, int]
107
  context_info: ContextInfo
108
+ related_word_groups: list[RelatedWordGroup]
109
+ message: Optional[str] = None
110
  debug_info: Optional[dict[str, Any]] = None
111
 
112
 
113
+ class MultiRelatedWordResponse(BaseModel):
114
  success: bool
115
  original_phrase: str
116
  original_indices: dict[str, int]
117
+ results: list[RelatedWordWordResult]
118
  message: Optional[str] = None
119
+ duration: Optional[Decimal] = Field(gt=0, decimal_places=decimal_places)
120
+
121
+ @field_validator('duration', mode="before")
122
+ def validate_duration(cls, v: Union[float, Decimal, str, None]) -> Optional[Decimal]:
123
+ if v is None:
124
+ return v
125
+ return Decimal(f"{v:.{decimal_places}f}")
126
+
127
+ @field_serializer('duration')
128
+ def serialize_duration(self, value: Optional[Decimal]) -> Optional[float]:
129
+ """Serialize Decimal as float for JSON output"""
130
+ if value is None:
131
+ return None
132
+ return float(value)
133
 
134
 
135
  class HealthCheckResponse(BaseModel):
 
140
 
141
  class InputTextRow(TypedDict):
142
  """
143
+ TypedDict for an input text row.
144
  """
145
  idxRow: int
146
  text: str
poetry.lock CHANGED
@@ -601,6 +601,22 @@ files = [
601
  {file = "joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444"},
602
  ]
603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  [[package]]
605
  name = "langcodes"
606
  version = "3.5.0"
@@ -1004,6 +1020,18 @@ files = [
1004
  dev = ["pre-commit", "tox"]
1005
  testing = ["coverage", "pytest", "pytest-benchmark"]
1006
 
 
 
 
 
 
 
 
 
 
 
 
 
1007
  [[package]]
1008
  name = "preshed"
1009
  version = "3.0.10"
@@ -2136,4 +2164,4 @@ files = [
2136
  [metadata]
2137
  lock-version = "2.1"
2138
  python-versions = ">=3.10,<3.14.0"
2139
- content-hash = "98791d1abc3f92349756a2c761df7c0acedaf9aa1e482c42ae7986329c90ddb7"
 
601
  {file = "joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444"},
602
  ]
603
 
604
+ [[package]]
605
+ name = "jsonpath-ng"
606
+ version = "1.7.0"
607
+ description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
608
+ optional = false
609
+ python-versions = "*"
610
+ groups = ["test"]
611
+ files = [
612
+ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
613
+ {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
614
+ {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
615
+ ]
616
+
617
+ [package.dependencies]
618
+ ply = "*"
619
+
620
  [[package]]
621
  name = "langcodes"
622
  version = "3.5.0"
 
1020
  dev = ["pre-commit", "tox"]
1021
  testing = ["coverage", "pytest", "pytest-benchmark"]
1022
 
1023
+ [[package]]
1024
+ name = "ply"
1025
+ version = "3.11"
1026
+ description = "Python Lex & Yacc"
1027
+ optional = false
1028
+ python-versions = "*"
1029
+ groups = ["test"]
1030
+ files = [
1031
+ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
1032
+ {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
1033
+ ]
1034
+
1035
  [[package]]
1036
  name = "preshed"
1037
  version = "3.0.10"
 
2164
  [metadata]
2165
  lock-version = "2.1"
2166
  python-versions = ">=3.10,<3.14.0"
2167
+ content-hash = "ae8867d9a28d2dee6521df2f2ba249d664340d8e3b0ff65260e6ff70e45d1839"
pyproject.toml CHANGED
@@ -28,6 +28,7 @@ optional = true
28
  pytest = "^8.3.5"
29
  pytest-cov = "^6.1.1"
30
  httpx = "^0.28.1"
 
31
 
32
  [tool.poetry.group.webserver]
33
  optional = true
 
28
  pytest = "^8.3.5"
29
  pytest-cov = "^6.1.1"
30
  httpx = "^0.28.1"
31
+ jsonpath-ng = "^1.7.0"
32
 
33
  [tool.poetry.group.webserver]
34
  optional = true
requirements-test.txt CHANGED
@@ -1,3 +1,3 @@
1
  httpx==0.28.1
2
  pytest-cov==6.2.1
3
- pytest==8.4.1
 
1
  httpx==0.28.1
2
  pytest-cov==6.2.1
3
+ pytest==8.4.1
requirements-webserver.txt CHANGED
@@ -1,4 +1,5 @@
1
  asgi-correlation-id==4.3.4
2
  fastapi==0.115.14
 
3
  requests==2.32.4
4
  uvicorn==0.34.3
 
1
  asgi-correlation-id==4.3.4
2
  fastapi==0.115.14
3
+ pymongo==4.13.2
4
  requests==2.32.4
5
  uvicorn==0.34.3
requirements.txt CHANGED
@@ -2,5 +2,7 @@ nltk==3.9.1
2
  pyinflect==0.5.1
3
  pymongo==4.13.2
4
  python-dotenv==1.1.1
 
 
5
  spacy==3.8.7
6
- structlog==25.4.0
 
2
  pyinflect==0.5.1
3
  pymongo==4.13.2
4
  python-dotenv==1.1.1
5
+ spacy-legacy==3.0.12
6
+ spacy-loggers==1.0.5
7
  spacy==3.8.7
8
+ structlog==25.4.0
tests/events/get_wordnet_synonyms_custom_entry_happy.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 1}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["NoneType"], "primary_type": "NoneType", "is_array": false}}, {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 9}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false}}, {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 8}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false}}]
tests/events/get_wordnet_synonyms_piano_ok1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds"], "sample_count": 1}, "$.examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 0}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["pianoforte", "forte-piano"], "sample_count": 2}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false, "samples": ["TermRelationships.SYNONYM"], "sample_count": 1}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["wordnet"], "sample_count": 1}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["n"], "sample_count": 1}}
tests/events/request_thesaurus_custom4.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word": "happy",
3
+ "related": [
4
+ {
5
+ "definition": "def happy 1 - custom.",
6
+ "type": "synonym",
7
+ "words": ["joyful", "cheerful"]
8
+ },
9
+ {
10
+ "definition": "def happy 2 - custom.",
11
+ "type": "synonym",
12
+ "words": ["joy", "cheer", "elated"]
13
+ },
14
+ {
15
+ "definition": "def sad - custom.",
16
+ "type": "antonym",
17
+ "words": ["sad", "unhappy"]
18
+ }
19
+ ]
20
+ }
tests/events/response_thesaurus_phrase_inflated.json CHANGED
@@ -31,15 +31,182 @@
31
  ],
32
  "dependency": "amod"
33
  },
34
- "synonym_groups": [
35
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "definition": "give a certain impression or have a certain outward aspect",
37
  "examples": [
38
  "She seems to be sleeping",
39
  "This appears to be a very difficult problem"
40
  ],
41
  "wordnet_pos": "v",
42
- "synonyms": [
43
  {
44
  "base_form": "appear",
45
  "inflected_form": "appearing",
@@ -53,235 +220,1474 @@
53
  ]
54
  },
55
  {
56
- "definition": "search or seek",
 
57
  "examples": [
58
- "We looked all day and finally found the child in the forest",
59
- "Look elsewhere for the perfect gift!"
60
  ],
61
  "wordnet_pos": "v",
62
- "synonyms": [
63
  {
64
- "base_form": "search",
65
- "inflected_form": "searching",
66
  "matches_context": true
67
  }
68
  ]
69
  },
70
  {
71
- "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
 
72
  "examples": [
73
- "The house looks north",
74
- "My backyard look onto the pond"
75
  ],
76
  "wordnet_pos": "v",
77
- "synonyms": [
78
  {
79
- "base_form": "face",
80
- "inflected_form": "facing",
81
  "matches_context": true
82
  },
83
  {
84
- "base_form": "front",
85
- "inflected_form": "fronting",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  "matches_context": true
87
  }
88
  ]
89
  },
90
  {
91
- "definition": "take charge of or deal with",
 
92
  "examples": [
93
- "Could you see about lunch?",
94
- "I must attend to this matter"
95
  ],
96
  "wordnet_pos": "v",
97
- "synonyms": [
98
- {
99
- "base_form": "attend",
100
- "inflected_form": "attending",
101
- "matches_context": true
102
- },
103
  {
104
- "base_form": "see",
105
- "inflected_form": "seeing",
106
  "matches_context": true
107
- },
 
 
 
 
 
 
 
 
 
 
 
108
  {
109
- "base_form": "take care",
110
- "inflected_form": "taking care",
111
  "matches_context": true
112
  }
113
  ]
114
  },
115
  {
116
- "definition": "look forward to the probable occurrence of",
 
117
  "examples": [
118
- "We were expecting a visit from our relatives",
119
- "She is looking to a promotion"
120
  ],
121
  "wordnet_pos": "v",
122
- "synonyms": [
123
  {
124
- "base_form": "await",
125
- "inflected_form": "awaiting",
126
  "matches_context": true
127
- },
 
 
 
 
 
 
 
 
 
 
 
128
  {
129
- "base_form": "expect",
130
- "inflected_form": "expecting",
131
  "matches_context": true
132
  },
133
  {
134
- "base_form": "wait",
135
- "inflected_form": "waiting",
136
  "matches_context": true
137
  }
138
  ]
139
  },
140
  {
141
- "definition": "have faith or confidence in",
 
142
  "examples": [
143
- "you can count on me to help you any time",
144
- "Look to your friends for support"
145
  ],
146
  "wordnet_pos": "v",
147
- "synonyms": [
148
  {
149
- "base_form": "bank",
150
- "inflected_form": "banking",
151
  "matches_context": true
152
  },
153
  {
154
- "base_form": "bet",
155
- "inflected_form": "betting",
156
  "matches_context": true
157
  },
158
  {
159
- "base_form": "calculate",
160
- "inflected_form": "calculating",
161
  "matches_context": true
162
  },
163
  {
164
- "base_form": "count",
165
- "inflected_form": "counting",
166
  "matches_context": true
167
  },
168
  {
169
- "base_form": "depend",
170
- "inflected_form": "depending",
171
  "matches_context": true
172
  },
173
  {
174
- "base_form": "reckon",
175
- "inflected_form": "reckoning",
176
  "matches_context": true
177
  },
178
  {
179
- "base_form": "rely",
180
- "inflected_form": "relying",
181
  "matches_context": true
182
  },
183
  {
184
- "base_form": "swear",
185
- "inflected_form": "swearing",
186
  "matches_context": true
187
  }
188
  ]
189
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  ],
191
  "debug_info": {
192
  "spacy_token_indices": {
193
  "start": 42,
194
  "end": 49
195
  },
196
- "lemma": "look"
197
- }
198
- },
199
- {
200
- "original_word": "woman",
201
- "original_indices": {
202
- "start": 22,
203
- "end": 27
204
- },
205
- "context_info": {
206
- "pos": "NOUN",
207
- "sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
208
- "grammatical_form": "NN",
209
- "context_words": [
210
- "a",
211
- "rather",
212
- "severe",
213
- "-",
214
- "looking",
215
- "woman",
216
- "who",
217
- "was",
218
- "wearing",
219
- "square",
220
- "glasses"
221
- ],
222
- "dependency": "pobj"
223
- },
224
- "synonym_groups": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  {
 
226
  "definition": "an adult female person (as opposed to a man)",
227
  "examples": [
228
  "the woman kept house while the man hunted"
229
  ],
230
  "wordnet_pos": "n",
231
- "synonyms": [
232
  {
233
- "base_form": "adult female",
234
- "inflected_form": "adult female",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  "matches_context": false
236
  }
237
  ]
238
  },
239
  {
 
240
  "definition": "a human female employed to do housework",
241
  "examples": [
242
  "the char will clean the carpet",
243
  "I have a woman who comes in four hours a day while I write"
244
  ],
245
  "wordnet_pos": "n",
246
- "synonyms": [
 
 
 
 
 
247
  {
248
  "base_form": "char",
249
  "inflected_form": "char",
250
  "matches_context": false
251
  },
252
  {
253
- "base_form": "charwoman",
254
- "inflected_form": "charwoman",
255
  "matches_context": false
256
  },
257
  {
258
  "base_form": "cleaning lady",
259
  "inflected_form": "cleaning lady",
260
  "matches_context": false
261
- },
 
 
 
 
 
 
 
 
 
 
 
262
  {
263
- "base_form": "cleaning woman",
264
- "inflected_form": "cleaning woman",
265
  "matches_context": false
266
  }
267
  ]
268
  },
269
  {
 
270
  "definition": "women as a class",
271
  "examples": [
272
  "it's an insult to American womanhood",
273
  "woman is the glory of creation"
274
  ],
275
  "wordnet_pos": "n",
276
- "synonyms": [
 
 
 
 
 
277
  {
278
  "base_form": "fair sex",
279
  "inflected_form": "fair sex",
280
  "matches_context": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  },
282
  {
283
- "base_form": "womanhood",
284
- "inflected_form": "womanhood",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  "matches_context": false
286
  }
287
  ]
@@ -296,5 +1702,6 @@
296
  }
297
  }
298
  ],
299
- "message": "Got 2 synonym groups."
 
300
  }
 
31
  ],
32
  "dependency": "amod"
33
  },
34
+ "related_word_groups": [
35
  {
36
+ "relation_type": "hyponym",
37
+ "definition": "perceive with attention; direct one's gaze towards",
38
+ "examples": [
39
+ "She looked over the expanse of land",
40
+ "Look at your child!"
41
+ ],
42
+ "wordnet_pos": "v",
43
+ "related_words": [
44
+ {
45
+ "base_form": "goggle",
46
+ "inflected_form": "goggling",
47
+ "matches_context": true
48
+ },
49
+ {
50
+ "base_form": "gape",
51
+ "inflected_form": "gaping",
52
+ "matches_context": true
53
+ },
54
+ {
55
+ "base_form": "gawp",
56
+ "inflected_form": "gawping",
57
+ "matches_context": true
58
+ },
59
+ {
60
+ "base_form": "gawk",
61
+ "inflected_form": "gawking",
62
+ "matches_context": true
63
+ },
64
+ {
65
+ "base_form": "gloat",
66
+ "inflected_form": "gloating",
67
+ "matches_context": true
68
+ },
69
+ {
70
+ "base_form": "look around",
71
+ "inflected_form": "looking around",
72
+ "matches_context": true
73
+ },
74
+ {
75
+ "base_form": "ogle",
76
+ "inflected_form": "ogling",
77
+ "matches_context": true
78
+ },
79
+ {
80
+ "base_form": "give the glad eye",
81
+ "inflected_form": "giving the glad eye",
82
+ "matches_context": true
83
+ },
84
+ {
85
+ "base_form": "peep",
86
+ "inflected_form": "peeping",
87
+ "matches_context": true
88
+ },
89
+ {
90
+ "base_form": "look back",
91
+ "inflected_form": "looking back",
92
+ "matches_context": true
93
+ },
94
+ {
95
+ "base_form": "look backward",
96
+ "inflected_form": "looking backward",
97
+ "matches_context": true
98
+ },
99
+ {
100
+ "base_form": "regard",
101
+ "inflected_form": "regarding",
102
+ "matches_context": true
103
+ },
104
+ {
105
+ "base_form": "consider",
106
+ "inflected_form": "considering",
107
+ "matches_context": true
108
+ },
109
+ {
110
+ "base_form": "glance",
111
+ "inflected_form": "glancing",
112
+ "matches_context": true
113
+ },
114
+ {
115
+ "base_form": "peek",
116
+ "inflected_form": "peeking",
117
+ "matches_context": true
118
+ },
119
+ {
120
+ "base_form": "glint",
121
+ "inflected_form": "glinting",
122
+ "matches_context": true
123
+ },
124
+ {
125
+ "base_form": "eye",
126
+ "inflected_form": "eyeing",
127
+ "matches_context": true
128
+ },
129
+ {
130
+ "base_form": "eyeball",
131
+ "inflected_form": "eyeballing",
132
+ "matches_context": true
133
+ },
134
+ {
135
+ "base_form": "peer",
136
+ "inflected_form": "peering",
137
+ "matches_context": true
138
+ },
139
+ {
140
+ "base_form": "admire",
141
+ "inflected_form": "admiring",
142
+ "matches_context": true
143
+ },
144
+ {
145
+ "base_form": "gaze",
146
+ "inflected_form": "gazing",
147
+ "matches_context": true
148
+ },
149
+ {
150
+ "base_form": "stare",
151
+ "inflected_form": "staring",
152
+ "matches_context": true
153
+ },
154
+ {
155
+ "base_form": "look away",
156
+ "inflected_form": "looking away",
157
+ "matches_context": true
158
+ },
159
+ {
160
+ "base_form": "leer",
161
+ "inflected_form": "leering",
162
+ "matches_context": true
163
+ },
164
+ {
165
+ "base_form": "give the eye",
166
+ "inflected_form": "giving the eye",
167
+ "matches_context": true
168
+ },
169
+ {
170
+ "base_form": "give the once over",
171
+ "inflected_form": "giving the once over",
172
+ "matches_context": true
173
+ },
174
+ {
175
+ "base_form": "squint",
176
+ "inflected_form": "squinting",
177
+ "matches_context": true
178
+ },
179
+ {
180
+ "base_form": "take a look",
181
+ "inflected_form": "taking a look",
182
+ "matches_context": true
183
+ },
184
+ {
185
+ "base_form": "have a look",
186
+ "inflected_form": "having a look",
187
+ "matches_context": true
188
+ },
189
+ {
190
+ "base_form": "get a load",
191
+ "inflected_form": "getting a load",
192
+ "matches_context": true
193
+ },
194
+ {
195
+ "base_form": "stare",
196
+ "inflected_form": "staring",
197
+ "matches_context": true
198
+ }
199
+ ]
200
+ },
201
+ {
202
+ "relation_type": "synonym",
203
  "definition": "give a certain impression or have a certain outward aspect",
204
  "examples": [
205
  "She seems to be sleeping",
206
  "This appears to be a very difficult problem"
207
  ],
208
  "wordnet_pos": "v",
209
+ "related_words": [
210
  {
211
  "base_form": "appear",
212
  "inflected_form": "appearing",
 
220
  ]
221
  },
222
  {
223
+ "relation_type": "hypernym",
224
+ "definition": "give a certain impression or have a certain outward aspect",
225
  "examples": [
226
+ "She seems to be sleeping",
227
+ "This appears to be a very difficult problem"
228
  ],
229
  "wordnet_pos": "v",
230
+ "related_words": [
231
  {
232
+ "base_form": "be",
233
+ "inflected_form": "being",
234
  "matches_context": true
235
  }
236
  ]
237
  },
238
  {
239
+ "relation_type": "hyponym",
240
+ "definition": "give a certain impression or have a certain outward aspect",
241
  "examples": [
242
+ "She seems to be sleeping",
243
+ "This appears to be a very difficult problem"
244
  ],
245
  "wordnet_pos": "v",
246
+ "related_words": [
247
  {
248
+ "base_form": "glow",
249
+ "inflected_form": "glowing",
250
  "matches_context": true
251
  },
252
  {
253
+ "base_form": "beam",
254
+ "inflected_form": "beaming",
255
+ "matches_context": true
256
+ },
257
+ {
258
+ "base_form": "radiate",
259
+ "inflected_form": "radiating",
260
+ "matches_context": true
261
+ },
262
+ {
263
+ "base_form": "shine",
264
+ "inflected_form": "shining",
265
+ "matches_context": true
266
+ },
267
+ {
268
+ "base_form": "rise",
269
+ "inflected_form": "rising",
270
+ "matches_context": true
271
+ },
272
+ {
273
+ "base_form": "lift",
274
+ "inflected_form": "lifting",
275
+ "matches_context": true
276
+ },
277
+ {
278
+ "base_form": "rear",
279
+ "inflected_form": "rearing",
280
+ "matches_context": true
281
+ },
282
+ {
283
+ "base_form": "glitter",
284
+ "inflected_form": "glittering",
285
+ "matches_context": true
286
+ },
287
+ {
288
+ "base_form": "glisten",
289
+ "inflected_form": "glistening",
290
+ "matches_context": true
291
+ },
292
+ {
293
+ "base_form": "glint",
294
+ "inflected_form": "glinting",
295
+ "matches_context": true
296
+ },
297
+ {
298
+ "base_form": "gleam",
299
+ "inflected_form": "gleaming",
300
+ "matches_context": true
301
+ },
302
+ {
303
+ "base_form": "shine",
304
+ "inflected_form": "shining",
305
+ "matches_context": true
306
+ },
307
+ {
308
+ "base_form": "leap out",
309
+ "inflected_form": "leaping out",
310
+ "matches_context": true
311
+ },
312
+ {
313
+ "base_form": "jump out",
314
+ "inflected_form": "jumping out",
315
+ "matches_context": true
316
+ },
317
+ {
318
+ "base_form": "jump",
319
+ "inflected_form": "jumping",
320
+ "matches_context": true
321
+ },
322
+ {
323
+ "base_form": "stand out",
324
+ "inflected_form": "standing out",
325
+ "matches_context": true
326
+ },
327
+ {
328
+ "base_form": "stick out",
329
+ "inflected_form": "sticking out",
330
+ "matches_context": true
331
+ },
332
+ {
333
+ "base_form": "make",
334
+ "inflected_form": "making",
335
+ "matches_context": true
336
+ },
337
+ {
338
+ "base_form": "loom",
339
+ "inflected_form": "looming",
340
+ "matches_context": true
341
+ },
342
+ {
343
+ "base_form": "sound",
344
+ "inflected_form": "sounding",
345
+ "matches_context": true
346
+ },
347
+ {
348
+ "base_form": "cut",
349
+ "inflected_form": "cutting",
350
+ "matches_context": true
351
+ },
352
+ {
353
+ "base_form": "pass off",
354
+ "inflected_form": "passing off",
355
+ "matches_context": true
356
+ },
357
+ {
358
+ "base_form": "come across",
359
+ "inflected_form": "coming across",
360
+ "matches_context": true
361
+ },
362
+ {
363
+ "base_form": "feel",
364
+ "inflected_form": "feeling",
365
+ "matches_context": true
366
+ },
367
+ {
368
+ "base_form": "feel",
369
+ "inflected_form": "feeling",
370
  "matches_context": true
371
  }
372
  ]
373
  },
374
  {
375
+ "relation_type": "hypernym",
376
+ "definition": "have a certain outward or facial expression",
377
  "examples": [
378
+ "How does she look?",
379
+ "The child looks unhappy"
380
  ],
381
  "wordnet_pos": "v",
382
+ "related_words": [
 
 
 
 
 
383
  {
384
+ "base_form": "be",
385
+ "inflected_form": "being",
386
  "matches_context": true
387
+ }
388
+ ]
389
+ },
390
+ {
391
+ "relation_type": "hyponym",
392
+ "definition": "have a certain outward or facial expression",
393
+ "examples": [
394
+ "How does she look?",
395
+ "The child looks unhappy"
396
+ ],
397
+ "wordnet_pos": "v",
398
+ "related_words": [
399
  {
400
+ "base_form": "squint",
401
+ "inflected_form": "squinting",
402
  "matches_context": true
403
  }
404
  ]
405
  },
406
  {
407
+ "relation_type": "synonym",
408
+ "definition": "search or seek",
409
  "examples": [
410
+ "We looked all day and finally found the child in the forest",
411
+ "Look elsewhere for the perfect gift!"
412
  ],
413
  "wordnet_pos": "v",
414
+ "related_words": [
415
  {
416
+ "base_form": "search",
417
+ "inflected_form": "searching",
418
  "matches_context": true
419
+ }
420
+ ]
421
+ },
422
+ {
423
+ "relation_type": "hypernym",
424
+ "definition": "search or seek",
425
+ "examples": [
426
+ "We looked all day and finally found the child in the forest",
427
+ "Look elsewhere for the perfect gift!"
428
+ ],
429
+ "wordnet_pos": "v",
430
+ "related_words": [
431
  {
432
+ "base_form": "examine",
433
+ "inflected_form": "examining",
434
  "matches_context": true
435
  },
436
  {
437
+ "base_form": "see",
438
+ "inflected_form": "seeing",
439
  "matches_context": true
440
  }
441
  ]
442
  },
443
  {
444
+ "relation_type": "hyponym",
445
+ "definition": "search or seek",
446
  "examples": [
447
+ "We looked all day and finally found the child in the forest",
448
+ "Look elsewhere for the perfect gift!"
449
  ],
450
  "wordnet_pos": "v",
451
+ "related_words": [
452
  {
453
+ "base_form": "hunt",
454
+ "inflected_form": "hunting",
455
  "matches_context": true
456
  },
457
  {
458
+ "base_form": "cruise",
459
+ "inflected_form": "cruising",
460
  "matches_context": true
461
  },
462
  {
463
+ "base_form": "prospect",
464
+ "inflected_form": "prospecting",
465
  "matches_context": true
466
  },
467
  {
468
+ "base_form": "intrude",
469
+ "inflected_form": "intruding",
470
  "matches_context": true
471
  },
472
  {
473
+ "base_form": "horn in",
474
+ "inflected_form": "horning in",
475
  "matches_context": true
476
  },
477
  {
478
+ "base_form": "pry",
479
+ "inflected_form": "prying",
480
  "matches_context": true
481
  },
482
  {
483
+ "base_form": "nose",
484
+ "inflected_form": "nosing",
485
  "matches_context": true
486
  },
487
  {
488
+ "base_form": "poke",
489
+ "inflected_form": "poking",
490
  "matches_context": true
491
  }
492
  ]
493
+ },
494
+ {
495
+ "relation_type": "synonym",
496
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
497
+ "examples": [
498
+ "The house looks north",
499
+ "My backyard look onto the pond"
500
+ ],
501
+ "wordnet_pos": "v",
502
+ "related_words": [
503
+ {
504
+ "base_form": "front",
505
+ "inflected_form": "fronting",
506
+ "matches_context": true
507
+ },
508
+ {
509
+ "base_form": "face",
510
+ "inflected_form": "facing",
511
+ "matches_context": true
512
+ }
513
+ ]
514
+ },
515
+ {
516
+ "relation_type": "antonym",
517
+ "definition": "",
518
+ "examples": [],
519
+ "related_words": [
520
+ {
521
+ "base_form": "back",
522
+ "inflected_form": "backing",
523
+ "matches_context": true
524
+ }
525
+ ]
526
+ },
527
+ {
528
+ "relation_type": "hypernym",
529
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
530
+ "examples": [
531
+ "The house looks north",
532
+ "My backyard look onto the pond"
533
+ ],
534
+ "wordnet_pos": "v",
535
+ "related_words": [
536
+ {
537
+ "base_form": "lie",
538
+ "inflected_form": "lying",
539
+ "matches_context": true
540
+ }
541
+ ]
542
+ },
543
+ {
544
+ "relation_type": "hyponym",
545
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
546
+ "examples": [
547
+ "The house looks north",
548
+ "My backyard look onto the pond"
549
+ ],
550
+ "wordnet_pos": "v",
551
+ "related_words": [
552
+ {
553
+ "base_form": "confront",
554
+ "inflected_form": "confronting",
555
+ "matches_context": true
556
+ }
557
+ ]
558
+ },
559
+ {
560
+ "relation_type": "synonym",
561
+ "definition": "take charge of or deal with",
562
+ "examples": [
563
+ "Could you see about lunch?",
564
+ "I must attend to this matter"
565
+ ],
566
+ "wordnet_pos": "v",
567
+ "related_words": [
568
+ {
569
+ "base_form": "attend",
570
+ "inflected_form": "attending",
571
+ "matches_context": true
572
+ },
573
+ {
574
+ "base_form": "take care",
575
+ "inflected_form": "taking care",
576
+ "matches_context": true
577
+ },
578
+ {
579
+ "base_form": "see",
580
+ "inflected_form": "seeing",
581
+ "matches_context": true
582
+ }
583
+ ]
584
+ },
585
+ {
586
+ "relation_type": "hypernym",
587
+ "definition": "take charge of or deal with",
588
+ "examples": [
589
+ "Could you see about lunch?",
590
+ "I must attend to this matter"
591
+ ],
592
+ "wordnet_pos": "v",
593
+ "related_words": [
594
+ {
595
+ "base_form": "care",
596
+ "inflected_form": "caring",
597
+ "matches_context": true
598
+ },
599
+ {
600
+ "base_form": "give care",
601
+ "inflected_form": "giving care",
602
+ "matches_context": true
603
+ }
604
+ ]
605
+ },
606
+ {
607
+ "relation_type": "hyponym",
608
+ "definition": "take charge of or deal with",
609
+ "examples": [
610
+ "Could you see about lunch?",
611
+ "I must attend to this matter"
612
+ ],
613
+ "wordnet_pos": "v",
614
+ "related_words": [
615
+ {
616
+ "base_form": "tend",
617
+ "inflected_form": "tending",
618
+ "matches_context": true
619
+ },
620
+ {
621
+ "base_form": "minister",
622
+ "inflected_form": "ministering",
623
+ "matches_context": true
624
+ }
625
+ ]
626
+ },
627
+ {
628
+ "relation_type": "hypernym",
629
+ "definition": "convey by one's expression",
630
+ "examples": [
631
+ "She looked her devotion to me"
632
+ ],
633
+ "wordnet_pos": "v",
634
+ "related_words": [
635
+ {
636
+ "base_form": "convey",
637
+ "inflected_form": "conveying",
638
+ "matches_context": true
639
+ }
640
+ ]
641
+ },
642
+ {
643
+ "relation_type": "synonym",
644
+ "definition": "look forward to the probable occurrence of",
645
+ "examples": [
646
+ "We were expecting a visit from our relatives",
647
+ "She is looking to a promotion"
648
+ ],
649
+ "wordnet_pos": "v",
650
+ "related_words": [
651
+ {
652
+ "base_form": "expect",
653
+ "inflected_form": "expecting",
654
+ "matches_context": true
655
+ },
656
+ {
657
+ "base_form": "await",
658
+ "inflected_form": "awaiting",
659
+ "matches_context": true
660
+ },
661
+ {
662
+ "base_form": "wait",
663
+ "inflected_form": "waiting",
664
+ "matches_context": true
665
+ }
666
+ ]
667
+ },
668
+ {
669
+ "relation_type": "hyponym",
670
+ "definition": "look forward to the probable occurrence of",
671
+ "examples": [
672
+ "We were expecting a visit from our relatives",
673
+ "She is looking to a promotion"
674
+ ],
675
+ "wordnet_pos": "v",
676
+ "related_words": [
677
+ {
678
+ "base_form": "look forward",
679
+ "inflected_form": "looking forward",
680
+ "matches_context": true
681
+ },
682
+ {
683
+ "base_form": "anticipate",
684
+ "inflected_form": "anticipating",
685
+ "matches_context": true
686
+ },
687
+ {
688
+ "base_form": "look for",
689
+ "inflected_form": "looking for",
690
+ "matches_context": true
691
+ },
692
+ {
693
+ "base_form": "look to",
694
+ "inflected_form": "looking to",
695
+ "matches_context": true
696
+ },
697
+ {
698
+ "base_form": "hang on",
699
+ "inflected_form": "hanging on",
700
+ "matches_context": true
701
+ },
702
+ {
703
+ "base_form": "hold the line",
704
+ "inflected_form": "holding the line",
705
+ "matches_context": true
706
+ },
707
+ {
708
+ "base_form": "hold on",
709
+ "inflected_form": "holding on",
710
+ "matches_context": true
711
+ },
712
+ {
713
+ "base_form": "expect",
714
+ "inflected_form": "expecting",
715
+ "matches_context": true
716
+ },
717
+ {
718
+ "base_form": "look to",
719
+ "inflected_form": "looking to",
720
+ "matches_context": true
721
+ }
722
+ ]
723
+ },
724
+ {
725
+ "relation_type": "hypernym",
726
+ "definition": "accord in appearance with",
727
+ "examples": [
728
+ "You don't look your age!"
729
+ ],
730
+ "wordnet_pos": "v",
731
+ "related_words": [
732
+ {
733
+ "base_form": "match",
734
+ "inflected_form": "matching",
735
+ "matches_context": true
736
+ },
737
+ {
738
+ "base_form": "fit",
739
+ "inflected_form": "fitting",
740
+ "matches_context": true
741
+ },
742
+ {
743
+ "base_form": "correspond",
744
+ "inflected_form": "corresponding",
745
+ "matches_context": true
746
+ },
747
+ {
748
+ "base_form": "check",
749
+ "inflected_form": "checking",
750
+ "matches_context": true
751
+ },
752
+ {
753
+ "base_form": "jibe",
754
+ "inflected_form": "jibing",
755
+ "matches_context": true
756
+ },
757
+ {
758
+ "base_form": "gibe",
759
+ "inflected_form": "gibing",
760
+ "matches_context": true
761
+ },
762
+ {
763
+ "base_form": "tally",
764
+ "inflected_form": "tallying",
765
+ "matches_context": true
766
+ },
767
+ {
768
+ "base_form": "agree",
769
+ "inflected_form": "agreeing",
770
+ "matches_context": true
771
+ }
772
+ ]
773
+ },
774
+ {
775
+ "relation_type": "synonym",
776
+ "definition": "have faith or confidence in",
777
+ "examples": [
778
+ "you can count on me to help you any time",
779
+ "Look to your friends for support"
780
+ ],
781
+ "wordnet_pos": "v",
782
+ "related_words": [
783
+ {
784
+ "base_form": "count",
785
+ "inflected_form": "counting",
786
+ "matches_context": true
787
+ },
788
+ {
789
+ "base_form": "bet",
790
+ "inflected_form": "betting",
791
+ "matches_context": true
792
+ },
793
+ {
794
+ "base_form": "depend",
795
+ "inflected_form": "depending",
796
+ "matches_context": true
797
+ },
798
+ {
799
+ "base_form": "swear",
800
+ "inflected_form": "swearing",
801
+ "matches_context": true
802
+ },
803
+ {
804
+ "base_form": "rely",
805
+ "inflected_form": "relying",
806
+ "matches_context": true
807
+ },
808
+ {
809
+ "base_form": "bank",
810
+ "inflected_form": "banking",
811
+ "matches_context": true
812
+ },
813
+ {
814
+ "base_form": "calculate",
815
+ "inflected_form": "calculating",
816
+ "matches_context": true
817
+ },
818
+ {
819
+ "base_form": "reckon",
820
+ "inflected_form": "reckoning",
821
+ "matches_context": true
822
+ }
823
+ ]
824
+ },
825
+ {
826
+ "relation_type": "hypernym",
827
+ "definition": "have faith or confidence in",
828
+ "examples": [
829
+ "you can count on me to help you any time",
830
+ "Look to your friends for support"
831
+ ],
832
+ "wordnet_pos": "v",
833
+ "related_words": [
834
+ {
835
+ "base_form": "trust",
836
+ "inflected_form": "trusting",
837
+ "matches_context": true
838
+ }
839
+ ]
840
+ }
841
  ],
842
  "debug_info": {
843
  "spacy_token_indices": {
844
  "start": 42,
845
  "end": 49
846
  },
847
+ "lemma": "look"
848
+ }
849
+ },
850
+ {
851
+ "original_word": "woman",
852
+ "original_indices": {
853
+ "start": 22,
854
+ "end": 27
855
+ },
856
+ "context_info": {
857
+ "pos": "NOUN",
858
+ "sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
859
+ "grammatical_form": "NN",
860
+ "context_words": [
861
+ "a",
862
+ "rather",
863
+ "severe",
864
+ "-",
865
+ "looking",
866
+ "woman",
867
+ "who",
868
+ "was",
869
+ "wearing",
870
+ "square",
871
+ "glasses"
872
+ ],
873
+ "dependency": "pobj"
874
+ },
875
+ "related_word_groups": [
876
+ {
877
+ "relation_type": "synonym",
878
+ "definition": "an adult female person (as opposed to a man)",
879
+ "examples": [
880
+ "the woman kept house while the man hunted"
881
+ ],
882
+ "wordnet_pos": "n",
883
+ "related_words": [
884
+ {
885
+ "base_form": "adult female",
886
+ "inflected_form": "adult female",
887
+ "matches_context": false
888
+ }
889
+ ]
890
+ },
891
+ {
892
+ "relation_type": "antonym",
893
+ "definition": "",
894
+ "examples": [],
895
+ "related_words": [
896
+ {
897
+ "base_form": "man",
898
+ "inflected_form": "man",
899
+ "matches_context": false
900
+ }
901
+ ]
902
+ },
903
+ {
904
+ "relation_type": "hypernym",
905
+ "definition": "an adult female person (as opposed to a man)",
906
+ "examples": [
907
+ "the woman kept house while the man hunted"
908
+ ],
909
+ "wordnet_pos": "n",
910
+ "related_words": [
911
+ {
912
+ "base_form": "female",
913
+ "inflected_form": "female",
914
+ "matches_context": false
915
+ },
916
+ {
917
+ "base_form": "female person",
918
+ "inflected_form": "female person",
919
+ "matches_context": false
920
+ },
921
+ {
922
+ "base_form": "adult",
923
+ "inflected_form": "adult",
924
+ "matches_context": false
925
+ },
926
+ {
927
+ "base_form": "grownup",
928
+ "inflected_form": "grownup",
929
+ "matches_context": false
930
+ }
931
+ ]
932
+ },
933
+ {
934
+ "relation_type": "hyponym",
935
+ "definition": "an adult female person (as opposed to a man)",
936
+ "examples": [
937
+ "the woman kept house while the man hunted"
938
+ ],
939
+ "wordnet_pos": "n",
940
+ "related_words": [
941
+ {
942
+ "base_form": "jezebel",
943
+ "inflected_form": "jezebel",
944
+ "matches_context": false
945
+ },
946
+ {
947
+ "base_form": "mother figure",
948
+ "inflected_form": "mother figure",
949
+ "matches_context": false
950
+ },
951
+ {
952
+ "base_form": "smasher",
953
+ "inflected_form": "smasher",
954
+ "matches_context": false
955
+ },
956
+ {
957
+ "base_form": "stunner",
958
+ "inflected_form": "stunner",
959
+ "matches_context": false
960
+ },
961
+ {
962
+ "base_form": "knockout",
963
+ "inflected_form": "knockout",
964
+ "matches_context": false
965
+ },
966
+ {
967
+ "base_form": "beauty",
968
+ "inflected_form": "beauty",
969
+ "matches_context": false
970
+ },
971
+ {
972
+ "base_form": "ravisher",
973
+ "inflected_form": "ravisher",
974
+ "matches_context": false
975
+ },
976
+ {
977
+ "base_form": "sweetheart",
978
+ "inflected_form": "sweetheart",
979
+ "matches_context": false
980
+ },
981
+ {
982
+ "base_form": "peach",
983
+ "inflected_form": "peach",
984
+ "matches_context": false
985
+ },
986
+ {
987
+ "base_form": "lulu",
988
+ "inflected_form": "lulu",
989
+ "matches_context": false
990
+ },
991
+ {
992
+ "base_form": "looker",
993
+ "inflected_form": "looker",
994
+ "matches_context": false
995
+ },
996
+ {
997
+ "base_form": "mantrap",
998
+ "inflected_form": "mantrap",
999
+ "matches_context": false
1000
+ },
1001
+ {
1002
+ "base_form": "dish",
1003
+ "inflected_form": "dish",
1004
+ "matches_context": false
1005
+ },
1006
+ {
1007
+ "base_form": "B-girl",
1008
+ "inflected_form": "b-girl",
1009
+ "matches_context": false
1010
+ },
1011
+ {
1012
+ "base_form": "bar girl",
1013
+ "inflected_form": "bar girl",
1014
+ "matches_context": false
1015
+ },
1016
+ {
1017
+ "base_form": "heroine",
1018
+ "inflected_form": "heroine",
1019
+ "matches_context": false
1020
+ },
1021
+ {
1022
+ "base_form": "prostitute",
1023
+ "inflected_form": "prostitute",
1024
+ "matches_context": false
1025
+ },
1026
+ {
1027
+ "base_form": "cocotte",
1028
+ "inflected_form": "cocotte",
1029
+ "matches_context": false
1030
+ },
1031
+ {
1032
+ "base_form": "whore",
1033
+ "inflected_form": "whore",
1034
+ "matches_context": false
1035
+ },
1036
+ {
1037
+ "base_form": "harlot",
1038
+ "inflected_form": "harlot",
1039
+ "matches_context": false
1040
+ },
1041
+ {
1042
+ "base_form": "bawd",
1043
+ "inflected_form": "bawd",
1044
+ "matches_context": false
1045
+ },
1046
+ {
1047
+ "base_form": "tart",
1048
+ "inflected_form": "tart",
1049
+ "matches_context": false
1050
+ },
1051
+ {
1052
+ "base_form": "cyprian",
1053
+ "inflected_form": "cyprian",
1054
+ "matches_context": false
1055
+ },
1056
+ {
1057
+ "base_form": "fancy woman",
1058
+ "inflected_form": "fancy woman",
1059
+ "matches_context": false
1060
+ },
1061
+ {
1062
+ "base_form": "working girl",
1063
+ "inflected_form": "working girl",
1064
+ "matches_context": false
1065
+ },
1066
+ {
1067
+ "base_form": "sporting lady",
1068
+ "inflected_form": "sporting lady",
1069
+ "matches_context": false
1070
+ },
1071
+ {
1072
+ "base_form": "lady of pleasure",
1073
+ "inflected_form": "lady of pleasure",
1074
+ "matches_context": false
1075
+ },
1076
+ {
1077
+ "base_form": "woman of the street",
1078
+ "inflected_form": "woman of the street",
1079
+ "matches_context": false
1080
+ },
1081
+ {
1082
+ "base_form": "inamorata",
1083
+ "inflected_form": "inamorata",
1084
+ "matches_context": false
1085
+ },
1086
+ {
1087
+ "base_form": "cat",
1088
+ "inflected_form": "cat",
1089
+ "matches_context": false
1090
+ },
1091
+ {
1092
+ "base_form": "girl",
1093
+ "inflected_form": "girl",
1094
+ "matches_context": false
1095
+ },
1096
+ {
1097
+ "base_form": "miss",
1098
+ "inflected_form": "miss",
1099
+ "matches_context": false
1100
+ },
1101
+ {
1102
+ "base_form": "missy",
1103
+ "inflected_form": "missy",
1104
+ "matches_context": false
1105
+ },
1106
+ {
1107
+ "base_form": "young lady",
1108
+ "inflected_form": "young lady",
1109
+ "matches_context": false
1110
+ },
1111
+ {
1112
+ "base_form": "young woman",
1113
+ "inflected_form": "young woman",
1114
+ "matches_context": false
1115
+ },
1116
+ {
1117
+ "base_form": "fille",
1118
+ "inflected_form": "fille",
1119
+ "matches_context": false
1120
+ },
1121
+ {
1122
+ "base_form": "matriarch",
1123
+ "inflected_form": "matriarch",
1124
+ "matches_context": false
1125
+ },
1126
+ {
1127
+ "base_form": "wife",
1128
+ "inflected_form": "wife",
1129
+ "matches_context": false
1130
+ },
1131
+ {
1132
+ "base_form": "married woman",
1133
+ "inflected_form": "married woman",
1134
+ "matches_context": false
1135
+ },
1136
+ {
1137
+ "base_form": "girlfriend",
1138
+ "inflected_form": "girlfriend",
1139
+ "matches_context": false
1140
+ },
1141
+ {
1142
+ "base_form": "Cinderella",
1143
+ "inflected_form": "cinderella",
1144
+ "matches_context": false
1145
+ },
1146
+ {
1147
+ "base_form": "bachelor girl",
1148
+ "inflected_form": "bachelor girl",
1149
+ "matches_context": false
1150
+ },
1151
+ {
1152
+ "base_form": "bachelorette",
1153
+ "inflected_form": "bachelorette",
1154
+ "matches_context": false
1155
+ },
1156
+ {
1157
+ "base_form": "vestal",
1158
+ "inflected_form": "vestal",
1159
+ "matches_context": false
1160
+ },
1161
+ {
1162
+ "base_form": "widow",
1163
+ "inflected_form": "widow",
1164
+ "matches_context": false
1165
+ },
1166
+ {
1167
+ "base_form": "widow woman",
1168
+ "inflected_form": "widow woman",
1169
+ "matches_context": false
1170
+ },
1171
+ {
1172
+ "base_form": "Wave",
1173
+ "inflected_form": "wave",
1174
+ "matches_context": false
1175
+ },
1176
+ {
1177
+ "base_form": "nymphet",
1178
+ "inflected_form": "nymphet",
1179
+ "matches_context": false
1180
+ },
1181
+ {
1182
+ "base_form": "bluestocking",
1183
+ "inflected_form": "bluestocking",
1184
+ "matches_context": false
1185
+ },
1186
+ {
1187
+ "base_form": "bas bleu",
1188
+ "inflected_form": "bas bleu",
1189
+ "matches_context": false
1190
+ },
1191
+ {
1192
+ "base_form": "coquette",
1193
+ "inflected_form": "coquette",
1194
+ "matches_context": false
1195
+ },
1196
+ {
1197
+ "base_form": "flirt",
1198
+ "inflected_form": "flirt",
1199
+ "matches_context": false
1200
+ },
1201
+ {
1202
+ "base_form": "vamp",
1203
+ "inflected_form": "vamp",
1204
+ "matches_context": false
1205
+ },
1206
+ {
1207
+ "base_form": "vamper",
1208
+ "inflected_form": "vamper",
1209
+ "matches_context": false
1210
+ },
1211
+ {
1212
+ "base_form": "minx",
1213
+ "inflected_form": "minx",
1214
+ "matches_context": false
1215
+ },
1216
+ {
1217
+ "base_form": "tease",
1218
+ "inflected_form": "tease",
1219
+ "matches_context": false
1220
+ },
1221
+ {
1222
+ "base_form": "prickteaser",
1223
+ "inflected_form": "prickteaser",
1224
+ "matches_context": false
1225
+ },
1226
+ {
1227
+ "base_form": "bridesmaid",
1228
+ "inflected_form": "bridesmaid",
1229
+ "matches_context": false
1230
+ },
1231
+ {
1232
+ "base_form": "maid of honor",
1233
+ "inflected_form": "maid of honor",
1234
+ "matches_context": false
1235
+ },
1236
+ {
1237
+ "base_form": "white woman",
1238
+ "inflected_form": "white woman",
1239
+ "matches_context": false
1240
+ },
1241
+ {
1242
+ "base_form": "wonder woman",
1243
+ "inflected_form": "wonder woman",
1244
+ "matches_context": false
1245
+ },
1246
+ {
1247
+ "base_form": "Wac",
1248
+ "inflected_form": "wac",
1249
+ "matches_context": false
1250
+ },
1251
+ {
1252
+ "base_form": "mestiza",
1253
+ "inflected_form": "mestiza",
1254
+ "matches_context": false
1255
+ },
1256
+ {
1257
+ "base_form": "nanny",
1258
+ "inflected_form": "nanny",
1259
+ "matches_context": false
1260
+ },
1261
+ {
1262
+ "base_form": "nursemaid",
1263
+ "inflected_form": "nursemaid",
1264
+ "matches_context": false
1265
+ },
1266
+ {
1267
+ "base_form": "nurse",
1268
+ "inflected_form": "nurse",
1269
+ "matches_context": false
1270
+ },
1271
+ {
1272
+ "base_form": "debutante",
1273
+ "inflected_form": "debutante",
1274
+ "matches_context": false
1275
+ },
1276
+ {
1277
+ "base_form": "deb",
1278
+ "inflected_form": "deb",
1279
+ "matches_context": false
1280
+ },
1281
+ {
1282
+ "base_form": "dominatrix",
1283
+ "inflected_form": "dominatrix",
1284
+ "matches_context": false
1285
+ },
1286
+ {
1287
+ "base_form": "girlfriend",
1288
+ "inflected_form": "girlfriend",
1289
+ "matches_context": false
1290
+ },
1291
+ {
1292
+ "base_form": "girl",
1293
+ "inflected_form": "girl",
1294
+ "matches_context": false
1295
+ },
1296
+ {
1297
+ "base_form": "lady friend",
1298
+ "inflected_form": "lady friend",
1299
+ "matches_context": false
1300
+ },
1301
+ {
1302
+ "base_form": "unmarried woman",
1303
+ "inflected_form": "unmarried woman",
1304
+ "matches_context": false
1305
+ },
1306
+ {
1307
+ "base_form": "donna",
1308
+ "inflected_form": "donna",
1309
+ "matches_context": false
1310
+ },
1311
+ {
1312
+ "base_form": "eyeful",
1313
+ "inflected_form": "eyeful",
1314
+ "matches_context": false
1315
+ },
1316
+ {
1317
+ "base_form": "divorcee",
1318
+ "inflected_form": "divorcee",
1319
+ "matches_context": false
1320
+ },
1321
+ {
1322
+ "base_form": "grass widow",
1323
+ "inflected_form": "grass widow",
1324
+ "matches_context": false
1325
+ },
1326
+ {
1327
+ "base_form": "amazon",
1328
+ "inflected_form": "amazon",
1329
+ "matches_context": false
1330
+ },
1331
+ {
1332
+ "base_form": "virago",
1333
+ "inflected_form": "virago",
1334
+ "matches_context": false
1335
+ },
1336
+ {
1337
+ "base_form": "lady",
1338
+ "inflected_form": "lady",
1339
+ "matches_context": false
1340
+ },
1341
+ {
1342
+ "base_form": "maenad",
1343
+ "inflected_form": "maenad",
1344
+ "matches_context": false
1345
+ },
1346
+ {
1347
+ "base_form": "mistress",
1348
+ "inflected_form": "mistress",
1349
+ "matches_context": false
1350
+ },
1351
+ {
1352
+ "base_form": "kept woman",
1353
+ "inflected_form": "kept woman",
1354
+ "matches_context": false
1355
+ },
1356
+ {
1357
+ "base_form": "fancy woman",
1358
+ "inflected_form": "fancy woman",
1359
+ "matches_context": false
1360
+ },
1361
+ {
1362
+ "base_form": "nymph",
1363
+ "inflected_form": "nymph",
1364
+ "matches_context": false
1365
+ },
1366
+ {
1367
+ "base_form": "houri",
1368
+ "inflected_form": "houri",
1369
+ "matches_context": false
1370
+ },
1371
+ {
1372
+ "base_form": "geisha",
1373
+ "inflected_form": "geisha",
1374
+ "matches_context": false
1375
+ },
1376
+ {
1377
+ "base_form": "geisha girl",
1378
+ "inflected_form": "geisha girl",
1379
+ "matches_context": false
1380
+ },
1381
+ {
1382
+ "base_form": "matriarch",
1383
+ "inflected_form": "matriarch",
1384
+ "matches_context": false
1385
+ },
1386
+ {
1387
+ "base_form": "materfamilias",
1388
+ "inflected_form": "materfamilias",
1389
+ "matches_context": false
1390
+ },
1391
+ {
1392
+ "base_form": "matron",
1393
+ "inflected_form": "matron",
1394
+ "matches_context": false
1395
+ },
1396
+ {
1397
+ "base_form": "baggage",
1398
+ "inflected_form": "baggage",
1399
+ "matches_context": false
1400
+ },
1401
+ {
1402
+ "base_form": "broad",
1403
+ "inflected_form": "broad",
1404
+ "matches_context": false
1405
+ },
1406
+ {
1407
+ "base_form": "girl",
1408
+ "inflected_form": "girl",
1409
+ "matches_context": false
1410
+ },
1411
+ {
1412
+ "base_form": "enchantress",
1413
+ "inflected_form": "enchantress",
1414
+ "matches_context": false
1415
+ },
1416
+ {
1417
+ "base_form": "temptress",
1418
+ "inflected_form": "temptress",
1419
+ "matches_context": false
1420
+ },
1421
+ {
1422
+ "base_form": "siren",
1423
+ "inflected_form": "siren",
1424
+ "matches_context": false
1425
+ },
1426
+ {
1427
+ "base_form": "Delilah",
1428
+ "inflected_form": "delilah",
1429
+ "matches_context": false
1430
+ },
1431
+ {
1432
+ "base_form": "femme fatale",
1433
+ "inflected_form": "femme fatale",
1434
+ "matches_context": false
1435
+ },
1436
+ {
1437
+ "base_form": "gravida",
1438
+ "inflected_form": "gravida",
1439
+ "matches_context": false
1440
+ },
1441
+ {
1442
+ "base_form": "jilt",
1443
+ "inflected_form": "jilt",
1444
+ "matches_context": false
1445
+ },
1446
+ {
1447
+ "base_form": "maenad",
1448
+ "inflected_form": "maenad",
1449
+ "matches_context": false
1450
+ },
1451
+ {
1452
+ "base_form": "nullipara",
1453
+ "inflected_form": "nullipara",
1454
+ "matches_context": false
1455
+ },
1456
+ {
1457
+ "base_form": "shiksa",
1458
+ "inflected_form": "shiksa",
1459
+ "matches_context": false
1460
+ },
1461
+ {
1462
+ "base_form": "shikse",
1463
+ "inflected_form": "shikse",
1464
+ "matches_context": false
1465
+ },
1466
+ {
1467
+ "base_form": "ex-wife",
1468
+ "inflected_form": "ex-wife",
1469
+ "matches_context": false
1470
+ },
1471
+ {
1472
+ "base_form": "ex",
1473
+ "inflected_form": "ex",
1474
+ "matches_context": false
1475
+ },
1476
+ {
1477
+ "base_form": "gold digger",
1478
+ "inflected_form": "gold digger",
1479
+ "matches_context": false
1480
+ },
1481
+ {
1482
+ "base_form": "old woman",
1483
+ "inflected_form": "old woman",
1484
+ "matches_context": false
1485
+ },
1486
+ {
1487
+ "base_form": "dame",
1488
+ "inflected_form": "dame",
1489
+ "matches_context": false
1490
+ },
1491
+ {
1492
+ "base_form": "madam",
1493
+ "inflected_form": "madam",
1494
+ "matches_context": false
1495
+ },
1496
+ {
1497
+ "base_form": "ma'am",
1498
+ "inflected_form": "ma'am",
1499
+ "matches_context": false
1500
+ },
1501
+ {
1502
+ "base_form": "lady",
1503
+ "inflected_form": "lady",
1504
+ "matches_context": false
1505
+ },
1506
+ {
1507
+ "base_form": "gentlewoman",
1508
+ "inflected_form": "gentlewoman",
1509
+ "matches_context": false
1510
+ },
1511
+ {
1512
+ "base_form": "sylph",
1513
+ "inflected_form": "sylph",
1514
+ "matches_context": false
1515
+ },
1516
+ {
1517
+ "base_form": "ball-buster",
1518
+ "inflected_form": "ball-buster",
1519
+ "matches_context": false
1520
+ },
1521
+ {
1522
+ "base_form": "ball-breaker",
1523
+ "inflected_form": "ball-breaker",
1524
+ "matches_context": false
1525
+ }
1526
+ ]
1527
+ },
1528
  {
1529
+ "relation_type": "meronym",
1530
  "definition": "an adult female person (as opposed to a man)",
1531
  "examples": [
1532
  "the woman kept house while the man hunted"
1533
  ],
1534
  "wordnet_pos": "n",
1535
+ "related_words": [
1536
  {
1537
+ "base_form": "adult female body",
1538
+ "inflected_form": "adult female body",
1539
+ "matches_context": false
1540
+ },
1541
+ {
1542
+ "base_form": "woman's body",
1543
+ "inflected_form": "woman's body",
1544
+ "matches_context": false
1545
+ }
1546
+ ]
1547
+ },
1548
+ {
1549
+ "relation_type": "antonym",
1550
+ "definition": "",
1551
+ "examples": [],
1552
+ "related_words": [
1553
+ {
1554
+ "base_form": "man",
1555
+ "inflected_form": "man",
1556
+ "matches_context": false
1557
+ }
1558
+ ]
1559
+ },
1560
+ {
1561
+ "relation_type": "hypernym",
1562
+ "definition": "a female person who plays a significant role (wife or mistress or girlfriend) in the life of a particular man",
1563
+ "examples": [
1564
+ "he was faithful to his woman"
1565
+ ],
1566
+ "wordnet_pos": "n",
1567
+ "related_words": [
1568
+ {
1569
+ "base_form": "female",
1570
+ "inflected_form": "female",
1571
+ "matches_context": false
1572
+ },
1573
+ {
1574
+ "base_form": "female person",
1575
+ "inflected_form": "female person",
1576
  "matches_context": false
1577
  }
1578
  ]
1579
  },
1580
  {
1581
+ "relation_type": "synonym",
1582
  "definition": "a human female employed to do housework",
1583
  "examples": [
1584
  "the char will clean the carpet",
1585
  "I have a woman who comes in four hours a day while I write"
1586
  ],
1587
  "wordnet_pos": "n",
1588
+ "related_words": [
1589
+ {
1590
+ "base_form": "charwoman",
1591
+ "inflected_form": "charwoman",
1592
+ "matches_context": false
1593
+ },
1594
  {
1595
  "base_form": "char",
1596
  "inflected_form": "char",
1597
  "matches_context": false
1598
  },
1599
  {
1600
+ "base_form": "cleaning woman",
1601
+ "inflected_form": "cleaning woman",
1602
  "matches_context": false
1603
  },
1604
  {
1605
  "base_form": "cleaning lady",
1606
  "inflected_form": "cleaning lady",
1607
  "matches_context": false
1608
+ }
1609
+ ]
1610
+ },
1611
+ {
1612
+ "relation_type": "hypernym",
1613
+ "definition": "a human female employed to do housework",
1614
+ "examples": [
1615
+ "the char will clean the carpet",
1616
+ "I have a woman who comes in four hours a day while I write"
1617
+ ],
1618
+ "wordnet_pos": "n",
1619
+ "related_words": [
1620
  {
1621
+ "base_form": "cleaner",
1622
+ "inflected_form": "cleaner",
1623
  "matches_context": false
1624
  }
1625
  ]
1626
  },
1627
  {
1628
+ "relation_type": "synonym",
1629
  "definition": "women as a class",
1630
  "examples": [
1631
  "it's an insult to American womanhood",
1632
  "woman is the glory of creation"
1633
  ],
1634
  "wordnet_pos": "n",
1635
+ "related_words": [
1636
+ {
1637
+ "base_form": "womanhood",
1638
+ "inflected_form": "womanhood",
1639
+ "matches_context": false
1640
+ },
1641
  {
1642
  "base_form": "fair sex",
1643
  "inflected_form": "fair sex",
1644
  "matches_context": false
1645
+ }
1646
+ ]
1647
+ },
1648
+ {
1649
+ "relation_type": "hypernym",
1650
+ "definition": "women as a class",
1651
+ "examples": [
1652
+ "it's an insult to American womanhood",
1653
+ "woman is the glory of creation"
1654
+ ],
1655
+ "wordnet_pos": "n",
1656
+ "related_words": [
1657
+ {
1658
+ "base_form": "class",
1659
+ "inflected_form": "class",
1660
+ "matches_context": false
1661
  },
1662
  {
1663
+ "base_form": "stratum",
1664
+ "inflected_form": "stratum",
1665
+ "matches_context": false
1666
+ },
1667
+ {
1668
+ "base_form": "social class",
1669
+ "inflected_form": "social class",
1670
+ "matches_context": false
1671
+ },
1672
+ {
1673
+ "base_form": "socio-economic class",
1674
+ "inflected_form": "socio-economic class",
1675
+ "matches_context": false
1676
+ }
1677
+ ]
1678
+ },
1679
+ {
1680
+ "relation_type": "holonym",
1681
+ "definition": "women as a class",
1682
+ "examples": [
1683
+ "it's an insult to American womanhood",
1684
+ "woman is the glory of creation"
1685
+ ],
1686
+ "wordnet_pos": "n",
1687
+ "related_words": [
1688
+ {
1689
+ "base_form": "womankind",
1690
+ "inflected_form": "womankind",
1691
  "matches_context": false
1692
  }
1693
  ]
 
1702
  }
1703
  }
1704
  ],
1705
+ "message": "Got 2 synonym groups.",
1706
+ "duration": 0.0003
1707
  }
tests/events/response_thesaurus_phrase_inflated2.json ADDED
@@ -0,0 +1,1707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "success": true,
3
+ "original_phrase": "rather severe-looking woman",
4
+ "original_indices": {
5
+ "start": 28,
6
+ "end": 55
7
+ },
8
+ "results": [
9
+ {
10
+ "original_word": "looking",
11
+ "original_indices": {
12
+ "start": 14,
13
+ "end": 21
14
+ },
15
+ "context_info": {
16
+ "pos": "VERB",
17
+ "sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
18
+ "grammatical_form": "VBG",
19
+ "context_words": [
20
+ "at",
21
+ "a",
22
+ "rather",
23
+ "severe",
24
+ "-",
25
+ "looking",
26
+ "woman",
27
+ "who",
28
+ "was",
29
+ "wearing",
30
+ "square"
31
+ ],
32
+ "dependency": "amod"
33
+ },
34
+ "related_word_groups": [
35
+ {
36
+ "relation_type": "hyponym",
37
+ "definition": "perceive with attention; direct one's gaze towards",
38
+ "examples": [
39
+ "She looked over the expanse of land",
40
+ "Look at your child!"
41
+ ],
42
+ "wordnet_pos": "v",
43
+ "related_words": [
44
+ {
45
+ "base_form": "goggle",
46
+ "inflected_form": "goggling",
47
+ "matches_context": true
48
+ },
49
+ {
50
+ "base_form": "gape",
51
+ "inflected_form": "gaping",
52
+ "matches_context": true
53
+ },
54
+ {
55
+ "base_form": "gawp",
56
+ "inflected_form": "gawping",
57
+ "matches_context": true
58
+ },
59
+ {
60
+ "base_form": "gawk",
61
+ "inflected_form": "gawking",
62
+ "matches_context": true
63
+ },
64
+ {
65
+ "base_form": "gloat",
66
+ "inflected_form": "gloating",
67
+ "matches_context": true
68
+ },
69
+ {
70
+ "base_form": "look around",
71
+ "inflected_form": "looking around",
72
+ "matches_context": true
73
+ },
74
+ {
75
+ "base_form": "ogle",
76
+ "inflected_form": "ogling",
77
+ "matches_context": true
78
+ },
79
+ {
80
+ "base_form": "give the glad eye",
81
+ "inflected_form": "giving the glad eye",
82
+ "matches_context": true
83
+ },
84
+ {
85
+ "base_form": "peep",
86
+ "inflected_form": "peeping",
87
+ "matches_context": true
88
+ },
89
+ {
90
+ "base_form": "look back",
91
+ "inflected_form": "looking back",
92
+ "matches_context": true
93
+ },
94
+ {
95
+ "base_form": "look backward",
96
+ "inflected_form": "looking backward",
97
+ "matches_context": true
98
+ },
99
+ {
100
+ "base_form": "regard",
101
+ "inflected_form": "regarding",
102
+ "matches_context": true
103
+ },
104
+ {
105
+ "base_form": "consider",
106
+ "inflected_form": "considering",
107
+ "matches_context": true
108
+ },
109
+ {
110
+ "base_form": "glance",
111
+ "inflected_form": "glancing",
112
+ "matches_context": true
113
+ },
114
+ {
115
+ "base_form": "peek",
116
+ "inflected_form": "peeking",
117
+ "matches_context": true
118
+ },
119
+ {
120
+ "base_form": "glint",
121
+ "inflected_form": "glinting",
122
+ "matches_context": true
123
+ },
124
+ {
125
+ "base_form": "eye",
126
+ "inflected_form": "eyeing",
127
+ "matches_context": true
128
+ },
129
+ {
130
+ "base_form": "eyeball",
131
+ "inflected_form": "eyeballing",
132
+ "matches_context": true
133
+ },
134
+ {
135
+ "base_form": "peer",
136
+ "inflected_form": "peering",
137
+ "matches_context": true
138
+ },
139
+ {
140
+ "base_form": "admire",
141
+ "inflected_form": "admiring",
142
+ "matches_context": true
143
+ },
144
+ {
145
+ "base_form": "gaze",
146
+ "inflected_form": "gazing",
147
+ "matches_context": true
148
+ },
149
+ {
150
+ "base_form": "stare",
151
+ "inflected_form": "staring",
152
+ "matches_context": true
153
+ },
154
+ {
155
+ "base_form": "look away",
156
+ "inflected_form": "looking away",
157
+ "matches_context": true
158
+ },
159
+ {
160
+ "base_form": "leer",
161
+ "inflected_form": "leering",
162
+ "matches_context": true
163
+ },
164
+ {
165
+ "base_form": "give the eye",
166
+ "inflected_form": "giving the eye",
167
+ "matches_context": true
168
+ },
169
+ {
170
+ "base_form": "give the once over",
171
+ "inflected_form": "giving the once over",
172
+ "matches_context": true
173
+ },
174
+ {
175
+ "base_form": "squint",
176
+ "inflected_form": "squinting",
177
+ "matches_context": true
178
+ },
179
+ {
180
+ "base_form": "take a look",
181
+ "inflected_form": "taking a look",
182
+ "matches_context": true
183
+ },
184
+ {
185
+ "base_form": "have a look",
186
+ "inflected_form": "having a look",
187
+ "matches_context": true
188
+ },
189
+ {
190
+ "base_form": "get a load",
191
+ "inflected_form": "getting a load",
192
+ "matches_context": true
193
+ },
194
+ {
195
+ "base_form": "stare",
196
+ "inflected_form": "staring",
197
+ "matches_context": true
198
+ }
199
+ ]
200
+ },
201
+ {
202
+ "relation_type": "synonym",
203
+ "definition": "give a certain impression or have a certain outward aspect",
204
+ "examples": [
205
+ "She seems to be sleeping",
206
+ "This appears to be a very difficult problem"
207
+ ],
208
+ "wordnet_pos": "v",
209
+ "related_words": [
210
+ {
211
+ "base_form": "appear",
212
+ "inflected_form": "appearing",
213
+ "matches_context": true
214
+ },
215
+ {
216
+ "base_form": "seem",
217
+ "inflected_form": "seeming",
218
+ "matches_context": true
219
+ }
220
+ ]
221
+ },
222
+ {
223
+ "relation_type": "hypernym",
224
+ "definition": "give a certain impression or have a certain outward aspect",
225
+ "examples": [
226
+ "She seems to be sleeping",
227
+ "This appears to be a very difficult problem"
228
+ ],
229
+ "wordnet_pos": "v",
230
+ "related_words": [
231
+ {
232
+ "base_form": "be",
233
+ "inflected_form": "being",
234
+ "matches_context": true
235
+ }
236
+ ]
237
+ },
238
+ {
239
+ "relation_type": "hyponym",
240
+ "definition": "give a certain impression or have a certain outward aspect",
241
+ "examples": [
242
+ "She seems to be sleeping",
243
+ "This appears to be a very difficult problem"
244
+ ],
245
+ "wordnet_pos": "v",
246
+ "related_words": [
247
+ {
248
+ "base_form": "glow",
249
+ "inflected_form": "glowing",
250
+ "matches_context": true
251
+ },
252
+ {
253
+ "base_form": "beam",
254
+ "inflected_form": "beaming",
255
+ "matches_context": true
256
+ },
257
+ {
258
+ "base_form": "radiate",
259
+ "inflected_form": "radiating",
260
+ "matches_context": true
261
+ },
262
+ {
263
+ "base_form": "shine",
264
+ "inflected_form": "shining",
265
+ "matches_context": true
266
+ },
267
+ {
268
+ "base_form": "rise",
269
+ "inflected_form": "rising",
270
+ "matches_context": true
271
+ },
272
+ {
273
+ "base_form": "lift",
274
+ "inflected_form": "lifting",
275
+ "matches_context": true
276
+ },
277
+ {
278
+ "base_form": "rear",
279
+ "inflected_form": "rearing",
280
+ "matches_context": true
281
+ },
282
+ {
283
+ "base_form": "glitter",
284
+ "inflected_form": "glittering",
285
+ "matches_context": true
286
+ },
287
+ {
288
+ "base_form": "glisten",
289
+ "inflected_form": "glistening",
290
+ "matches_context": true
291
+ },
292
+ {
293
+ "base_form": "glint",
294
+ "inflected_form": "glinting",
295
+ "matches_context": true
296
+ },
297
+ {
298
+ "base_form": "gleam",
299
+ "inflected_form": "gleaming",
300
+ "matches_context": true
301
+ },
302
+ {
303
+ "base_form": "shine",
304
+ "inflected_form": "shining",
305
+ "matches_context": true
306
+ },
307
+ {
308
+ "base_form": "leap out",
309
+ "inflected_form": "leaping out",
310
+ "matches_context": true
311
+ },
312
+ {
313
+ "base_form": "jump out",
314
+ "inflected_form": "jumping out",
315
+ "matches_context": true
316
+ },
317
+ {
318
+ "base_form": "jump",
319
+ "inflected_form": "jumping",
320
+ "matches_context": true
321
+ },
322
+ {
323
+ "base_form": "stand out",
324
+ "inflected_form": "standing out",
325
+ "matches_context": true
326
+ },
327
+ {
328
+ "base_form": "stick out",
329
+ "inflected_form": "sticking out",
330
+ "matches_context": true
331
+ },
332
+ {
333
+ "base_form": "make",
334
+ "inflected_form": "making",
335
+ "matches_context": true
336
+ },
337
+ {
338
+ "base_form": "loom",
339
+ "inflected_form": "looming",
340
+ "matches_context": true
341
+ },
342
+ {
343
+ "base_form": "sound",
344
+ "inflected_form": "sounding",
345
+ "matches_context": true
346
+ },
347
+ {
348
+ "base_form": "cut",
349
+ "inflected_form": "cutting",
350
+ "matches_context": true
351
+ },
352
+ {
353
+ "base_form": "pass off",
354
+ "inflected_form": "passing off",
355
+ "matches_context": true
356
+ },
357
+ {
358
+ "base_form": "come across",
359
+ "inflected_form": "coming across",
360
+ "matches_context": true
361
+ },
362
+ {
363
+ "base_form": "feel",
364
+ "inflected_form": "feeling",
365
+ "matches_context": true
366
+ },
367
+ {
368
+ "base_form": "feel",
369
+ "inflected_form": "feeling",
370
+ "matches_context": true
371
+ }
372
+ ]
373
+ },
374
+ {
375
+ "relation_type": "hypernym",
376
+ "definition": "have a certain outward or facial expression",
377
+ "examples": [
378
+ "How does she look?",
379
+ "The child looks unhappy"
380
+ ],
381
+ "wordnet_pos": "v",
382
+ "related_words": [
383
+ {
384
+ "base_form": "be",
385
+ "inflected_form": "being",
386
+ "matches_context": true
387
+ }
388
+ ]
389
+ },
390
+ {
391
+ "relation_type": "hyponym",
392
+ "definition": "have a certain outward or facial expression",
393
+ "examples": [
394
+ "How does she look?",
395
+ "The child looks unhappy"
396
+ ],
397
+ "wordnet_pos": "v",
398
+ "related_words": [
399
+ {
400
+ "base_form": "squint",
401
+ "inflected_form": "squinting",
402
+ "matches_context": true
403
+ }
404
+ ]
405
+ },
406
+ {
407
+ "relation_type": "synonym",
408
+ "definition": "search or seek",
409
+ "examples": [
410
+ "We looked all day and finally found the child in the forest",
411
+ "Look elsewhere for the perfect gift!"
412
+ ],
413
+ "wordnet_pos": "v",
414
+ "related_words": [
415
+ {
416
+ "base_form": "search",
417
+ "inflected_form": "searching",
418
+ "matches_context": true
419
+ }
420
+ ]
421
+ },
422
+ {
423
+ "relation_type": "hypernym",
424
+ "definition": "search or seek",
425
+ "examples": [
426
+ "We looked all day and finally found the child in the forest",
427
+ "Look elsewhere for the perfect gift!"
428
+ ],
429
+ "wordnet_pos": "v",
430
+ "related_words": [
431
+ {
432
+ "base_form": "examine",
433
+ "inflected_form": "examining",
434
+ "matches_context": true
435
+ },
436
+ {
437
+ "base_form": "see",
438
+ "inflected_form": "seeing",
439
+ "matches_context": true
440
+ }
441
+ ]
442
+ },
443
+ {
444
+ "relation_type": "hyponym",
445
+ "definition": "search or seek",
446
+ "examples": [
447
+ "We looked all day and finally found the child in the forest",
448
+ "Look elsewhere for the perfect gift!"
449
+ ],
450
+ "wordnet_pos": "v",
451
+ "related_words": [
452
+ {
453
+ "base_form": "hunt",
454
+ "inflected_form": "hunting",
455
+ "matches_context": true
456
+ },
457
+ {
458
+ "base_form": "cruise",
459
+ "inflected_form": "cruising",
460
+ "matches_context": true
461
+ },
462
+ {
463
+ "base_form": "prospect",
464
+ "inflected_form": "prospecting",
465
+ "matches_context": true
466
+ },
467
+ {
468
+ "base_form": "intrude",
469
+ "inflected_form": "intruding",
470
+ "matches_context": true
471
+ },
472
+ {
473
+ "base_form": "horn in",
474
+ "inflected_form": "horning in",
475
+ "matches_context": true
476
+ },
477
+ {
478
+ "base_form": "pry",
479
+ "inflected_form": "prying",
480
+ "matches_context": true
481
+ },
482
+ {
483
+ "base_form": "nose",
484
+ "inflected_form": "nosing",
485
+ "matches_context": true
486
+ },
487
+ {
488
+ "base_form": "poke",
489
+ "inflected_form": "poking",
490
+ "matches_context": true
491
+ }
492
+ ]
493
+ },
494
+ {
495
+ "relation_type": "synonym",
496
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
497
+ "examples": [
498
+ "The house looks north",
499
+ "My backyard look onto the pond"
500
+ ],
501
+ "wordnet_pos": "v",
502
+ "related_words": [
503
+ {
504
+ "base_form": "front",
505
+ "inflected_form": "fronting",
506
+ "matches_context": true
507
+ },
508
+ {
509
+ "base_form": "face",
510
+ "inflected_form": "facing",
511
+ "matches_context": true
512
+ }
513
+ ]
514
+ },
515
+ {
516
+ "relation_type": "antonym",
517
+ "definition": "",
518
+ "examples": [],
519
+ "related_words": [
520
+ {
521
+ "base_form": "back",
522
+ "inflected_form": "backing",
523
+ "matches_context": true
524
+ }
525
+ ]
526
+ },
527
+ {
528
+ "relation_type": "hypernym",
529
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
530
+ "examples": [
531
+ "The house looks north",
532
+ "My backyard look onto the pond"
533
+ ],
534
+ "wordnet_pos": "v",
535
+ "related_words": [
536
+ {
537
+ "base_form": "lie",
538
+ "inflected_form": "lying",
539
+ "matches_context": true
540
+ }
541
+ ]
542
+ },
543
+ {
544
+ "relation_type": "hyponym",
545
+ "definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
546
+ "examples": [
547
+ "The house looks north",
548
+ "My backyard look onto the pond"
549
+ ],
550
+ "wordnet_pos": "v",
551
+ "related_words": [
552
+ {
553
+ "base_form": "confront",
554
+ "inflected_form": "confronting",
555
+ "matches_context": true
556
+ }
557
+ ]
558
+ },
559
+ {
560
+ "relation_type": "synonym",
561
+ "definition": "take charge of or deal with",
562
+ "examples": [
563
+ "Could you see about lunch?",
564
+ "I must attend to this matter"
565
+ ],
566
+ "wordnet_pos": "v",
567
+ "related_words": [
568
+ {
569
+ "base_form": "attend",
570
+ "inflected_form": "attending",
571
+ "matches_context": true
572
+ },
573
+ {
574
+ "base_form": "take care",
575
+ "inflected_form": "taking care",
576
+ "matches_context": true
577
+ },
578
+ {
579
+ "base_form": "see",
580
+ "inflected_form": "seeing",
581
+ "matches_context": true
582
+ }
583
+ ]
584
+ },
585
+ {
586
+ "relation_type": "hypernym",
587
+ "definition": "take charge of or deal with",
588
+ "examples": [
589
+ "Could you see about lunch?",
590
+ "I must attend to this matter"
591
+ ],
592
+ "wordnet_pos": "v",
593
+ "related_words": [
594
+ {
595
+ "base_form": "care",
596
+ "inflected_form": "caring",
597
+ "matches_context": true
598
+ },
599
+ {
600
+ "base_form": "give care",
601
+ "inflected_form": "giving care",
602
+ "matches_context": true
603
+ }
604
+ ]
605
+ },
606
+ {
607
+ "relation_type": "hyponym",
608
+ "definition": "take charge of or deal with",
609
+ "examples": [
610
+ "Could you see about lunch?",
611
+ "I must attend to this matter"
612
+ ],
613
+ "wordnet_pos": "v",
614
+ "related_words": [
615
+ {
616
+ "base_form": "tend",
617
+ "inflected_form": "tending",
618
+ "matches_context": true
619
+ },
620
+ {
621
+ "base_form": "minister",
622
+ "inflected_form": "ministering",
623
+ "matches_context": true
624
+ }
625
+ ]
626
+ },
627
+ {
628
+ "relation_type": "hypernym",
629
+ "definition": "convey by one's expression",
630
+ "examples": [
631
+ "She looked her devotion to me"
632
+ ],
633
+ "wordnet_pos": "v",
634
+ "related_words": [
635
+ {
636
+ "base_form": "convey",
637
+ "inflected_form": "conveying",
638
+ "matches_context": true
639
+ }
640
+ ]
641
+ },
642
+ {
643
+ "relation_type": "synonym",
644
+ "definition": "look forward to the probable occurrence of",
645
+ "examples": [
646
+ "We were expecting a visit from our relatives",
647
+ "She is looking to a promotion"
648
+ ],
649
+ "wordnet_pos": "v",
650
+ "related_words": [
651
+ {
652
+ "base_form": "expect",
653
+ "inflected_form": "expecting",
654
+ "matches_context": true
655
+ },
656
+ {
657
+ "base_form": "await",
658
+ "inflected_form": "awaiting",
659
+ "matches_context": true
660
+ },
661
+ {
662
+ "base_form": "wait",
663
+ "inflected_form": "waiting",
664
+ "matches_context": true
665
+ }
666
+ ]
667
+ },
668
+ {
669
+ "relation_type": "hyponym",
670
+ "definition": "look forward to the probable occurrence of",
671
+ "examples": [
672
+ "We were expecting a visit from our relatives",
673
+ "She is looking to a promotion"
674
+ ],
675
+ "wordnet_pos": "v",
676
+ "related_words": [
677
+ {
678
+ "base_form": "look forward",
679
+ "inflected_form": "looking forward",
680
+ "matches_context": true
681
+ },
682
+ {
683
+ "base_form": "anticipate",
684
+ "inflected_form": "anticipating",
685
+ "matches_context": true
686
+ },
687
+ {
688
+ "base_form": "look for",
689
+ "inflected_form": "looking for",
690
+ "matches_context": true
691
+ },
692
+ {
693
+ "base_form": "look to",
694
+ "inflected_form": "looking to",
695
+ "matches_context": true
696
+ },
697
+ {
698
+ "base_form": "hang on",
699
+ "inflected_form": "hanging on",
700
+ "matches_context": true
701
+ },
702
+ {
703
+ "base_form": "hold the line",
704
+ "inflected_form": "holding the line",
705
+ "matches_context": true
706
+ },
707
+ {
708
+ "base_form": "hold on",
709
+ "inflected_form": "holding on",
710
+ "matches_context": true
711
+ },
712
+ {
713
+ "base_form": "expect",
714
+ "inflected_form": "expecting",
715
+ "matches_context": true
716
+ },
717
+ {
718
+ "base_form": "look to",
719
+ "inflected_form": "looking to",
720
+ "matches_context": true
721
+ }
722
+ ]
723
+ },
724
+ {
725
+ "relation_type": "hypernym",
726
+ "definition": "accord in appearance with",
727
+ "examples": [
728
+ "You don't look your age!"
729
+ ],
730
+ "wordnet_pos": "v",
731
+ "related_words": [
732
+ {
733
+ "base_form": "match",
734
+ "inflected_form": "matching",
735
+ "matches_context": true
736
+ },
737
+ {
738
+ "base_form": "fit",
739
+ "inflected_form": "fitting",
740
+ "matches_context": true
741
+ },
742
+ {
743
+ "base_form": "correspond",
744
+ "inflected_form": "corresponding",
745
+ "matches_context": true
746
+ },
747
+ {
748
+ "base_form": "check",
749
+ "inflected_form": "checking",
750
+ "matches_context": true
751
+ },
752
+ {
753
+ "base_form": "jibe",
754
+ "inflected_form": "jibing",
755
+ "matches_context": true
756
+ },
757
+ {
758
+ "base_form": "gibe",
759
+ "inflected_form": "gibing",
760
+ "matches_context": true
761
+ },
762
+ {
763
+ "base_form": "tally",
764
+ "inflected_form": "tallying",
765
+ "matches_context": true
766
+ },
767
+ {
768
+ "base_form": "agree",
769
+ "inflected_form": "agreeing",
770
+ "matches_context": true
771
+ }
772
+ ]
773
+ },
774
+ {
775
+ "relation_type": "synonym",
776
+ "definition": "have faith or confidence in",
777
+ "examples": [
778
+ "you can count on me to help you any time",
779
+ "Look to your friends for support"
780
+ ],
781
+ "wordnet_pos": "v",
782
+ "related_words": [
783
+ {
784
+ "base_form": "count",
785
+ "inflected_form": "counting",
786
+ "matches_context": true
787
+ },
788
+ {
789
+ "base_form": "bet",
790
+ "inflected_form": "betting",
791
+ "matches_context": true
792
+ },
793
+ {
794
+ "base_form": "depend",
795
+ "inflected_form": "depending",
796
+ "matches_context": true
797
+ },
798
+ {
799
+ "base_form": "swear",
800
+ "inflected_form": "swearing",
801
+ "matches_context": true
802
+ },
803
+ {
804
+ "base_form": "rely",
805
+ "inflected_form": "relying",
806
+ "matches_context": true
807
+ },
808
+ {
809
+ "base_form": "bank",
810
+ "inflected_form": "banking",
811
+ "matches_context": true
812
+ },
813
+ {
814
+ "base_form": "calculate",
815
+ "inflected_form": "calculating",
816
+ "matches_context": true
817
+ },
818
+ {
819
+ "base_form": "reckon",
820
+ "inflected_form": "reckoning",
821
+ "matches_context": true
822
+ }
823
+ ]
824
+ },
825
+ {
826
+ "relation_type": "hypernym",
827
+ "definition": "have faith or confidence in",
828
+ "examples": [
829
+ "you can count on me to help you any time",
830
+ "Look to your friends for support"
831
+ ],
832
+ "wordnet_pos": "v",
833
+ "related_words": [
834
+ {
835
+ "base_form": "trust",
836
+ "inflected_form": "trusting",
837
+ "matches_context": true
838
+ }
839
+ ]
840
+ }
841
+ ],
842
+ "debug_info": {
843
+ "spacy_token_indices": {
844
+ "start": 42,
845
+ "end": 49
846
+ },
847
+ "lemma": "look"
848
+ }
849
+ },
850
+ {
851
+ "original_word": "woman",
852
+ "original_indices": {
853
+ "start": 22,
854
+ "end": 27
855
+ },
856
+ "context_info": {
857
+ "pos": "NOUN",
858
+ "sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
859
+ "grammatical_form": "NN",
860
+ "context_words": [
861
+ "a",
862
+ "rather",
863
+ "severe",
864
+ "-",
865
+ "looking",
866
+ "woman",
867
+ "who",
868
+ "was",
869
+ "wearing",
870
+ "square",
871
+ "glasses"
872
+ ],
873
+ "dependency": "pobj"
874
+ },
875
+ "related_word_groups": [
876
+ {
877
+ "relation_type": "synonym",
878
+ "definition": "an adult female person (as opposed to a man)",
879
+ "examples": [
880
+ "the woman kept house while the man hunted"
881
+ ],
882
+ "wordnet_pos": "n",
883
+ "related_words": [
884
+ {
885
+ "base_form": "adult female",
886
+ "inflected_form": "adult female",
887
+ "matches_context": false
888
+ }
889
+ ]
890
+ },
891
+ {
892
+ "relation_type": "antonym",
893
+ "definition": "",
894
+ "examples": [],
895
+ "related_words": [
896
+ {
897
+ "base_form": "man",
898
+ "inflected_form": "man",
899
+ "matches_context": false
900
+ }
901
+ ]
902
+ },
903
+ {
904
+ "relation_type": "hypernym",
905
+ "definition": "an adult female person (as opposed to a man)",
906
+ "examples": [
907
+ "the woman kept house while the man hunted"
908
+ ],
909
+ "wordnet_pos": "n",
910
+ "related_words": [
911
+ {
912
+ "base_form": "female",
913
+ "inflected_form": "female",
914
+ "matches_context": false
915
+ },
916
+ {
917
+ "base_form": "female person",
918
+ "inflected_form": "female person",
919
+ "matches_context": false
920
+ },
921
+ {
922
+ "base_form": "adult",
923
+ "inflected_form": "adult",
924
+ "matches_context": false
925
+ },
926
+ {
927
+ "base_form": "grownup",
928
+ "inflected_form": "grownup",
929
+ "matches_context": false
930
+ }
931
+ ]
932
+ },
933
+ {
934
+ "relation_type": "hyponym",
935
+ "definition": "an adult female person (as opposed to a man)",
936
+ "examples": [
937
+ "the woman kept house while the man hunted"
938
+ ],
939
+ "wordnet_pos": "n",
940
+ "related_words": [
941
+ {
942
+ "base_form": "jezebel",
943
+ "inflected_form": "jezebel",
944
+ "matches_context": false
945
+ },
946
+ {
947
+ "base_form": "mother figure",
948
+ "inflected_form": "mother figure",
949
+ "matches_context": false
950
+ },
951
+ {
952
+ "base_form": "smasher",
953
+ "inflected_form": "smasher",
954
+ "matches_context": false
955
+ },
956
+ {
957
+ "base_form": "stunner",
958
+ "inflected_form": "stunner",
959
+ "matches_context": false
960
+ },
961
+ {
962
+ "base_form": "knockout",
963
+ "inflected_form": "knockout",
964
+ "matches_context": false
965
+ },
966
+ {
967
+ "base_form": "beauty",
968
+ "inflected_form": "beauty",
969
+ "matches_context": false
970
+ },
971
+ {
972
+ "base_form": "ravisher",
973
+ "inflected_form": "ravisher",
974
+ "matches_context": false
975
+ },
976
+ {
977
+ "base_form": "sweetheart",
978
+ "inflected_form": "sweetheart",
979
+ "matches_context": false
980
+ },
981
+ {
982
+ "base_form": "peach",
983
+ "inflected_form": "peach",
984
+ "matches_context": false
985
+ },
986
+ {
987
+ "base_form": "lulu",
988
+ "inflected_form": "lulu",
989
+ "matches_context": false
990
+ },
991
+ {
992
+ "base_form": "looker",
993
+ "inflected_form": "looker",
994
+ "matches_context": false
995
+ },
996
+ {
997
+ "base_form": "mantrap",
998
+ "inflected_form": "mantrap",
999
+ "matches_context": false
1000
+ },
1001
+ {
1002
+ "base_form": "dish",
1003
+ "inflected_form": "dish",
1004
+ "matches_context": false
1005
+ },
1006
+ {
1007
+ "base_form": "B-girl",
1008
+ "inflected_form": "b-girl",
1009
+ "matches_context": false
1010
+ },
1011
+ {
1012
+ "base_form": "bar girl",
1013
+ "inflected_form": "bar girl",
1014
+ "matches_context": false
1015
+ },
1016
+ {
1017
+ "base_form": "heroine",
1018
+ "inflected_form": "heroine",
1019
+ "matches_context": false
1020
+ },
1021
+ {
1022
+ "base_form": "prostitute",
1023
+ "inflected_form": "prostitute",
1024
+ "matches_context": false
1025
+ },
1026
+ {
1027
+ "base_form": "cocotte",
1028
+ "inflected_form": "cocotte",
1029
+ "matches_context": false
1030
+ },
1031
+ {
1032
+ "base_form": "whore",
1033
+ "inflected_form": "whore",
1034
+ "matches_context": false
1035
+ },
1036
+ {
1037
+ "base_form": "harlot",
1038
+ "inflected_form": "harlot",
1039
+ "matches_context": false
1040
+ },
1041
+ {
1042
+ "base_form": "bawd",
1043
+ "inflected_form": "bawd",
1044
+ "matches_context": false
1045
+ },
1046
+ {
1047
+ "base_form": "tart",
1048
+ "inflected_form": "tart",
1049
+ "matches_context": false
1050
+ },
1051
+ {
1052
+ "base_form": "cyprian",
1053
+ "inflected_form": "cyprian",
1054
+ "matches_context": false
1055
+ },
1056
+ {
1057
+ "base_form": "fancy woman",
1058
+ "inflected_form": "fancy woman",
1059
+ "matches_context": false
1060
+ },
1061
+ {
1062
+ "base_form": "working girl",
1063
+ "inflected_form": "working girl",
1064
+ "matches_context": false
1065
+ },
1066
+ {
1067
+ "base_form": "sporting lady",
1068
+ "inflected_form": "sporting lady",
1069
+ "matches_context": false
1070
+ },
1071
+ {
1072
+ "base_form": "lady of pleasure",
1073
+ "inflected_form": "lady of pleasure",
1074
+ "matches_context": false
1075
+ },
1076
+ {
1077
+ "base_form": "woman of the street",
1078
+ "inflected_form": "woman of the street",
1079
+ "matches_context": false
1080
+ },
1081
+ {
1082
+ "base_form": "inamorata",
1083
+ "inflected_form": "inamorata",
1084
+ "matches_context": false
1085
+ },
1086
+ {
1087
+ "base_form": "cat",
1088
+ "inflected_form": "cat",
1089
+ "matches_context": false
1090
+ },
1091
+ {
1092
+ "base_form": "girl",
1093
+ "inflected_form": "girl",
1094
+ "matches_context": false
1095
+ },
1096
+ {
1097
+ "base_form": "miss",
1098
+ "inflected_form": "miss",
1099
+ "matches_context": false
1100
+ },
1101
+ {
1102
+ "base_form": "missy",
1103
+ "inflected_form": "missy",
1104
+ "matches_context": false
1105
+ },
1106
+ {
1107
+ "base_form": "young lady",
1108
+ "inflected_form": "young lady",
1109
+ "matches_context": false
1110
+ },
1111
+ {
1112
+ "base_form": "young woman",
1113
+ "inflected_form": "young woman",
1114
+ "matches_context": false
1115
+ },
1116
+ {
1117
+ "base_form": "fille",
1118
+ "inflected_form": "fille",
1119
+ "matches_context": false
1120
+ },
1121
+ {
1122
+ "base_form": "matriarch",
1123
+ "inflected_form": "matriarch",
1124
+ "matches_context": false
1125
+ },
1126
+ {
1127
+ "base_form": "wife",
1128
+ "inflected_form": "wife",
1129
+ "matches_context": false
1130
+ },
1131
+ {
1132
+ "base_form": "married woman",
1133
+ "inflected_form": "married woman",
1134
+ "matches_context": false
1135
+ },
1136
+ {
1137
+ "base_form": "girlfriend",
1138
+ "inflected_form": "girlfriend",
1139
+ "matches_context": false
1140
+ },
1141
+ {
1142
+ "base_form": "Cinderella",
1143
+ "inflected_form": "cinderella",
1144
+ "matches_context": false
1145
+ },
1146
+ {
1147
+ "base_form": "bachelor girl",
1148
+ "inflected_form": "bachelor girl",
1149
+ "matches_context": false
1150
+ },
1151
+ {
1152
+ "base_form": "bachelorette",
1153
+ "inflected_form": "bachelorette",
1154
+ "matches_context": false
1155
+ },
1156
+ {
1157
+ "base_form": "vestal",
1158
+ "inflected_form": "vestal",
1159
+ "matches_context": false
1160
+ },
1161
+ {
1162
+ "base_form": "widow",
1163
+ "inflected_form": "widow",
1164
+ "matches_context": false
1165
+ },
1166
+ {
1167
+ "base_form": "widow woman",
1168
+ "inflected_form": "widow woman",
1169
+ "matches_context": false
1170
+ },
1171
+ {
1172
+ "base_form": "Wave",
1173
+ "inflected_form": "wave",
1174
+ "matches_context": false
1175
+ },
1176
+ {
1177
+ "base_form": "nymphet",
1178
+ "inflected_form": "nymphet",
1179
+ "matches_context": false
1180
+ },
1181
+ {
1182
+ "base_form": "bluestocking",
1183
+ "inflected_form": "bluestocking",
1184
+ "matches_context": false
1185
+ },
1186
+ {
1187
+ "base_form": "bas bleu",
1188
+ "inflected_form": "bas bleu",
1189
+ "matches_context": false
1190
+ },
1191
+ {
1192
+ "base_form": "coquette",
1193
+ "inflected_form": "coquette",
1194
+ "matches_context": false
1195
+ },
1196
+ {
1197
+ "base_form": "flirt",
1198
+ "inflected_form": "flirt",
1199
+ "matches_context": false
1200
+ },
1201
+ {
1202
+ "base_form": "vamp",
1203
+ "inflected_form": "vamp",
1204
+ "matches_context": false
1205
+ },
1206
+ {
1207
+ "base_form": "vamper",
1208
+ "inflected_form": "vamper",
1209
+ "matches_context": false
1210
+ },
1211
+ {
1212
+ "base_form": "minx",
1213
+ "inflected_form": "minx",
1214
+ "matches_context": false
1215
+ },
1216
+ {
1217
+ "base_form": "tease",
1218
+ "inflected_form": "tease",
1219
+ "matches_context": false
1220
+ },
1221
+ {
1222
+ "base_form": "prickteaser",
1223
+ "inflected_form": "prickteaser",
1224
+ "matches_context": false
1225
+ },
1226
+ {
1227
+ "base_form": "bridesmaid",
1228
+ "inflected_form": "bridesmaid",
1229
+ "matches_context": false
1230
+ },
1231
+ {
1232
+ "base_form": "maid of honor",
1233
+ "inflected_form": "maid of honor",
1234
+ "matches_context": false
1235
+ },
1236
+ {
1237
+ "base_form": "white woman",
1238
+ "inflected_form": "white woman",
1239
+ "matches_context": false
1240
+ },
1241
+ {
1242
+ "base_form": "wonder woman",
1243
+ "inflected_form": "wonder woman",
1244
+ "matches_context": false
1245
+ },
1246
+ {
1247
+ "base_form": "Wac",
1248
+ "inflected_form": "wac",
1249
+ "matches_context": false
1250
+ },
1251
+ {
1252
+ "base_form": "mestiza",
1253
+ "inflected_form": "mestiza",
1254
+ "matches_context": false
1255
+ },
1256
+ {
1257
+ "base_form": "nanny",
1258
+ "inflected_form": "nanny",
1259
+ "matches_context": false
1260
+ },
1261
+ {
1262
+ "base_form": "nursemaid",
1263
+ "inflected_form": "nursemaid",
1264
+ "matches_context": false
1265
+ },
1266
+ {
1267
+ "base_form": "nurse",
1268
+ "inflected_form": "nurse",
1269
+ "matches_context": false
1270
+ },
1271
+ {
1272
+ "base_form": "debutante",
1273
+ "inflected_form": "debutante",
1274
+ "matches_context": false
1275
+ },
1276
+ {
1277
+ "base_form": "deb",
1278
+ "inflected_form": "deb",
1279
+ "matches_context": false
1280
+ },
1281
+ {
1282
+ "base_form": "dominatrix",
1283
+ "inflected_form": "dominatrix",
1284
+ "matches_context": false
1285
+ },
1286
+ {
1287
+ "base_form": "girlfriend",
1288
+ "inflected_form": "girlfriend",
1289
+ "matches_context": false
1290
+ },
1291
+ {
1292
+ "base_form": "girl",
1293
+ "inflected_form": "girl",
1294
+ "matches_context": false
1295
+ },
1296
+ {
1297
+ "base_form": "lady friend",
1298
+ "inflected_form": "lady friend",
1299
+ "matches_context": false
1300
+ },
1301
+ {
1302
+ "base_form": "unmarried woman",
1303
+ "inflected_form": "unmarried woman",
1304
+ "matches_context": false
1305
+ },
1306
+ {
1307
+ "base_form": "donna",
1308
+ "inflected_form": "donna",
1309
+ "matches_context": false
1310
+ },
1311
+ {
1312
+ "base_form": "eyeful",
1313
+ "inflected_form": "eyeful",
1314
+ "matches_context": false
1315
+ },
1316
+ {
1317
+ "base_form": "divorcee",
1318
+ "inflected_form": "divorcee",
1319
+ "matches_context": false
1320
+ },
1321
+ {
1322
+ "base_form": "grass widow",
1323
+ "inflected_form": "grass widow",
1324
+ "matches_context": false
1325
+ },
1326
+ {
1327
+ "base_form": "amazon",
1328
+ "inflected_form": "amazon",
1329
+ "matches_context": false
1330
+ },
1331
+ {
1332
+ "base_form": "virago",
1333
+ "inflected_form": "virago",
1334
+ "matches_context": false
1335
+ },
1336
+ {
1337
+ "base_form": "lady",
1338
+ "inflected_form": "lady",
1339
+ "matches_context": false
1340
+ },
1341
+ {
1342
+ "base_form": "maenad",
1343
+ "inflected_form": "maenad",
1344
+ "matches_context": false
1345
+ },
1346
+ {
1347
+ "base_form": "mistress",
1348
+ "inflected_form": "mistress",
1349
+ "matches_context": false
1350
+ },
1351
+ {
1352
+ "base_form": "kept woman",
1353
+ "inflected_form": "kept woman",
1354
+ "matches_context": false
1355
+ },
1356
+ {
1357
+ "base_form": "fancy woman",
1358
+ "inflected_form": "fancy woman",
1359
+ "matches_context": false
1360
+ },
1361
+ {
1362
+ "base_form": "nymph",
1363
+ "inflected_form": "nymph",
1364
+ "matches_context": false
1365
+ },
1366
+ {
1367
+ "base_form": "houri",
1368
+ "inflected_form": "houri",
1369
+ "matches_context": false
1370
+ },
1371
+ {
1372
+ "base_form": "geisha",
1373
+ "inflected_form": "geisha",
1374
+ "matches_context": false
1375
+ },
1376
+ {
1377
+ "base_form": "geisha girl",
1378
+ "inflected_form": "geisha girl",
1379
+ "matches_context": false
1380
+ },
1381
+ {
1382
+ "base_form": "matriarch",
1383
+ "inflected_form": "matriarch",
1384
+ "matches_context": false
1385
+ },
1386
+ {
1387
+ "base_form": "materfamilias",
1388
+ "inflected_form": "materfamilias",
1389
+ "matches_context": false
1390
+ },
1391
+ {
1392
+ "base_form": "matron",
1393
+ "inflected_form": "matron",
1394
+ "matches_context": false
1395
+ },
1396
+ {
1397
+ "base_form": "baggage",
1398
+ "inflected_form": "baggage",
1399
+ "matches_context": false
1400
+ },
1401
+ {
1402
+ "base_form": "broad",
1403
+ "inflected_form": "broad",
1404
+ "matches_context": false
1405
+ },
1406
+ {
1407
+ "base_form": "girl",
1408
+ "inflected_form": "girl",
1409
+ "matches_context": false
1410
+ },
1411
+ {
1412
+ "base_form": "enchantress",
1413
+ "inflected_form": "enchantress",
1414
+ "matches_context": false
1415
+ },
1416
+ {
1417
+ "base_form": "temptress",
1418
+ "inflected_form": "temptress",
1419
+ "matches_context": false
1420
+ },
1421
+ {
1422
+ "base_form": "siren",
1423
+ "inflected_form": "siren",
1424
+ "matches_context": false
1425
+ },
1426
+ {
1427
+ "base_form": "Delilah",
1428
+ "inflected_form": "delilah",
1429
+ "matches_context": false
1430
+ },
1431
+ {
1432
+ "base_form": "femme fatale",
1433
+ "inflected_form": "femme fatale",
1434
+ "matches_context": false
1435
+ },
1436
+ {
1437
+ "base_form": "gravida",
1438
+ "inflected_form": "gravida",
1439
+ "matches_context": false
1440
+ },
1441
+ {
1442
+ "base_form": "jilt",
1443
+ "inflected_form": "jilt",
1444
+ "matches_context": false
1445
+ },
1446
+ {
1447
+ "base_form": "maenad",
1448
+ "inflected_form": "maenad",
1449
+ "matches_context": false
1450
+ },
1451
+ {
1452
+ "base_form": "nullipara",
1453
+ "inflected_form": "nullipara",
1454
+ "matches_context": false
1455
+ },
1456
+ {
1457
+ "base_form": "shiksa",
1458
+ "inflected_form": "shiksa",
1459
+ "matches_context": false
1460
+ },
1461
+ {
1462
+ "base_form": "shikse",
1463
+ "inflected_form": "shikse",
1464
+ "matches_context": false
1465
+ },
1466
+ {
1467
+ "base_form": "ex-wife",
1468
+ "inflected_form": "ex-wife",
1469
+ "matches_context": false
1470
+ },
1471
+ {
1472
+ "base_form": "ex",
1473
+ "inflected_form": "ex",
1474
+ "matches_context": false
1475
+ },
1476
+ {
1477
+ "base_form": "gold digger",
1478
+ "inflected_form": "gold digger",
1479
+ "matches_context": false
1480
+ },
1481
+ {
1482
+ "base_form": "old woman",
1483
+ "inflected_form": "old woman",
1484
+ "matches_context": false
1485
+ },
1486
+ {
1487
+ "base_form": "dame",
1488
+ "inflected_form": "dame",
1489
+ "matches_context": false
1490
+ },
1491
+ {
1492
+ "base_form": "madam",
1493
+ "inflected_form": "madam",
1494
+ "matches_context": false
1495
+ },
1496
+ {
1497
+ "base_form": "ma'am",
1498
+ "inflected_form": "ma'am",
1499
+ "matches_context": false
1500
+ },
1501
+ {
1502
+ "base_form": "lady",
1503
+ "inflected_form": "lady",
1504
+ "matches_context": false
1505
+ },
1506
+ {
1507
+ "base_form": "gentlewoman",
1508
+ "inflected_form": "gentlewoman",
1509
+ "matches_context": false
1510
+ },
1511
+ {
1512
+ "base_form": "sylph",
1513
+ "inflected_form": "sylph",
1514
+ "matches_context": false
1515
+ },
1516
+ {
1517
+ "base_form": "ball-buster",
1518
+ "inflected_form": "ball-buster",
1519
+ "matches_context": false
1520
+ },
1521
+ {
1522
+ "base_form": "ball-breaker",
1523
+ "inflected_form": "ball-breaker",
1524
+ "matches_context": false
1525
+ }
1526
+ ]
1527
+ },
1528
+ {
1529
+ "relation_type": "meronym",
1530
+ "definition": "an adult female person (as opposed to a man)",
1531
+ "examples": [
1532
+ "the woman kept house while the man hunted"
1533
+ ],
1534
+ "wordnet_pos": "n",
1535
+ "related_words": [
1536
+ {
1537
+ "base_form": "adult female body",
1538
+ "inflected_form": "adult female body",
1539
+ "matches_context": false
1540
+ },
1541
+ {
1542
+ "base_form": "woman's body",
1543
+ "inflected_form": "woman's body",
1544
+ "matches_context": false
1545
+ }
1546
+ ]
1547
+ },
1548
+ {
1549
+ "relation_type": "antonym",
1550
+ "definition": "",
1551
+ "examples": [],
1552
+ "related_words": [
1553
+ {
1554
+ "base_form": "man",
1555
+ "inflected_form": "man",
1556
+ "matches_context": false
1557
+ }
1558
+ ]
1559
+ },
1560
+ {
1561
+ "relation_type": "hypernym",
1562
+ "definition": "a female person who plays a significant role (wife or mistress or girlfriend) in the life of a particular man",
1563
+ "examples": [
1564
+ "he was faithful to his woman"
1565
+ ],
1566
+ "wordnet_pos": "n",
1567
+ "related_words": [
1568
+ {
1569
+ "base_form": "female",
1570
+ "inflected_form": "female",
1571
+ "matches_context": false
1572
+ },
1573
+ {
1574
+ "base_form": "female person",
1575
+ "inflected_form": "female person",
1576
+ "matches_context": false
1577
+ }
1578
+ ]
1579
+ },
1580
+ {
1581
+ "relation_type": "synonym",
1582
+ "definition": "a human female employed to do housework",
1583
+ "examples": [
1584
+ "the char will clean the carpet",
1585
+ "I have a woman who comes in four hours a day while I write"
1586
+ ],
1587
+ "wordnet_pos": "n",
1588
+ "related_words": [
1589
+ {
1590
+ "base_form": "charwoman",
1591
+ "inflected_form": "charwoman",
1592
+ "matches_context": false
1593
+ },
1594
+ {
1595
+ "base_form": "char",
1596
+ "inflected_form": "char",
1597
+ "matches_context": false
1598
+ },
1599
+ {
1600
+ "base_form": "cleaning woman",
1601
+ "inflected_form": "cleaning woman",
1602
+ "matches_context": false
1603
+ },
1604
+ {
1605
+ "base_form": "cleaning lady",
1606
+ "inflected_form": "cleaning lady",
1607
+ "matches_context": false
1608
+ }
1609
+ ]
1610
+ },
1611
+ {
1612
+ "relation_type": "hypernym",
1613
+ "definition": "a human female employed to do housework",
1614
+ "examples": [
1615
+ "the char will clean the carpet",
1616
+ "I have a woman who comes in four hours a day while I write"
1617
+ ],
1618
+ "wordnet_pos": "n",
1619
+ "related_words": [
1620
+ {
1621
+ "base_form": "cleaner",
1622
+ "inflected_form": "cleaner",
1623
+ "matches_context": false
1624
+ }
1625
+ ]
1626
+ },
1627
+ {
1628
+ "relation_type": "synonym",
1629
+ "definition": "women as a class",
1630
+ "examples": [
1631
+ "it's an insult to American womanhood",
1632
+ "woman is the glory of creation"
1633
+ ],
1634
+ "wordnet_pos": "n",
1635
+ "related_words": [
1636
+ {
1637
+ "base_form": "womanhood",
1638
+ "inflected_form": "womanhood",
1639
+ "matches_context": false
1640
+ },
1641
+ {
1642
+ "base_form": "fair sex",
1643
+ "inflected_form": "fair sex",
1644
+ "matches_context": false
1645
+ }
1646
+ ]
1647
+ },
1648
+ {
1649
+ "relation_type": "hypernym",
1650
+ "definition": "women as a class",
1651
+ "examples": [
1652
+ "it's an insult to American womanhood",
1653
+ "woman is the glory of creation"
1654
+ ],
1655
+ "wordnet_pos": "n",
1656
+ "related_words": [
1657
+ {
1658
+ "base_form": "class",
1659
+ "inflected_form": "class",
1660
+ "matches_context": false
1661
+ },
1662
+ {
1663
+ "base_form": "stratum",
1664
+ "inflected_form": "stratum",
1665
+ "matches_context": false
1666
+ },
1667
+ {
1668
+ "base_form": "social class",
1669
+ "inflected_form": "social class",
1670
+ "matches_context": false
1671
+ },
1672
+ {
1673
+ "base_form": "socio-economic class",
1674
+ "inflected_form": "socio-economic class",
1675
+ "matches_context": false
1676
+ }
1677
+ ]
1678
+ },
1679
+ {
1680
+ "relation_type": "holonym",
1681
+ "definition": "women as a class",
1682
+ "examples": [
1683
+ "it's an insult to American womanhood",
1684
+ "woman is the glory of creation"
1685
+ ],
1686
+ "wordnet_pos": "n",
1687
+ "related_words": [
1688
+ {
1689
+ "base_form": "womankind",
1690
+ "inflected_form": "womankind",
1691
+ "matches_context": false
1692
+ }
1693
+ ]
1694
+ }
1695
+ ],
1696
+ "debug_info": {
1697
+ "spacy_token_indices": {
1698
+ "start": 50,
1699
+ "end": 55
1700
+ },
1701
+ "lemma": "woman"
1702
+ }
1703
+ }
1704
+ ],
1705
+ "message": "Got 2 synonym groups.",
1706
+ "duration": 0.0003
1707
+ }
tests/events/response_thesaurus_phrase_inflated_structure.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"$[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$[*].context_info": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].context_info.context_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].context_info.context_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 11}, "$[*].context_info.dependency": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["amod", "pobj"], "sample_count": 2}, "$[*].context_info.grammatical_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["VBG", "NN"], "sample_count": 2}, "$[*].context_info.pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["VERB", "NOUN"], "sample_count": 2}, "$[*].context_info.sentence": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.", "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes."], "sample_count": 2}, "$[*].debug_info": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].debug_info.lemma": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["look", "woman"], "sample_count": 2}, "$[*].debug_info.spacy_token_indices": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].debug_info.spacy_token_indices.end": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["49", "55"], "sample_count": 2}, "$[*].debug_info.spacy_token_indices.start": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["42", "50"], "sample_count": 2}, "$[*].original_indices": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].original_indices.end": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["21", "27"], "sample_count": 2}, "$[*].original_indices.start": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["14", "22"], "sample_count": 2}, "$[*].original_word": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["looking", "woman"], "sample_count": 2}, "$[*].related_word_groups": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 12}, "$[*].related_word_groups[*].definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["perceive with attention; direct one's gaze towards", "give a certain impression or have a certain outward aspect", "give a certain impression or have a certain outward aspect"], "sample_count": 3}, "$[*].related_word_groups[*].examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*].examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$[*].related_word_groups[*].related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*].related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 1}, "$[*].related_word_groups[*].related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["look away", "admire", "gaze"], "sample_count": 3}, "$[*].related_word_groups[*].related_words[*].inflected_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["looking away", "admiring", "gazing"], "sample_count": 3}, "$[*].related_word_groups[*].related_words[*].matches_context": {"types": ["bool"], "primary_type": "bool", "is_array": false, "samples": ["True", "True", "True"], "sample_count": 3}, "$[*].related_word_groups[*].relation_type": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["hyponym", "synonym", "hypernym"], "sample_count": 3}, "$[*].related_word_groups[*].wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["v", "v", "v"], "sample_count": 3}}
tests/my_ghost_writer/helpers_tests.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_report: dict):
4
+ for (row_k, row_v), (expected_row_k, expected_row_v) in zip(detailed_report.items(), expected_detailed_report.items()):
5
+ cls.assertEqual(row_k, expected_row_k)
6
+ del row_v["samples"]
7
+ del expected_row_v["samples"]
8
+ del row_v["sample_count"]
9
+ del expected_row_v["sample_count"]
10
+ cls.assertDictEqual(row_v, expected_row_v)
tests/my_ghost_writer/test_app.py CHANGED
@@ -12,6 +12,9 @@ from my_ghost_writer import __version__ as version_module
12
  from my_ghost_writer.app import app, mongo_health_check_background_task, lifespan
13
  from my_ghost_writer.constants import app_logger
14
  from tests import EVENTS_FOLDER
 
 
 
15
 
16
 
17
  class TestVersion(unittest.TestCase):
@@ -210,11 +213,12 @@ class TestAppEndpoints(unittest.TestCase):
210
  # --- /thesaurus-inflated-phrase Endpoint ---
211
  def test_get_synonyms_for_phrase_success(self):
212
  """Tests the success case for /thesaurus-inflated-phrase."""
213
- # Load the expected response from JSON file
214
  import json
215
- with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated.json", "r") as f:
216
- expected_response = json.load(f)
217
-
 
 
218
  body = {
219
  "word": "rather severe-looking woman",
220
  "text": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
@@ -223,13 +227,32 @@ class TestAppEndpoints(unittest.TestCase):
223
  }
224
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
225
  self.assertEqual(response.status_code, 200)
226
- json_response = response.json()
227
- self.assertEqual(json_response["success"], expected_response["success"])
228
- self.assertEqual(json_response["original_phrase"], expected_response["original_phrase"])
229
- self.assertEqual(json_response["original_indices"], expected_response["original_indices"])
230
- self.assertEqual(json_response["message"], expected_response["message"])
 
 
 
 
 
 
231
  # check only the first result
232
- self.assertEqual(json_response["results"][0], expected_response["results"][0])
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  def test_get_synonyms_for_phrase_no_synonyms(self):
235
  """Tests the case where no synonyms are found for the phrase."""
@@ -240,8 +263,13 @@ class TestAppEndpoints(unittest.TestCase):
240
  "end": 18
241
  }
242
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
 
243
  self.assertEqual(response.status_code, 200)
244
- self.assertEqual(response.json(), {
 
 
 
 
245
  "success": True,
246
  "original_phrase": "some phrase",
247
  "original_indices": {
@@ -262,9 +290,17 @@ class TestAppEndpoints(unittest.TestCase):
262
  }
263
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
264
  self.assertEqual(response.status_code, 200)
 
 
 
 
 
265
  self.assertDictEqual(
266
- response.json(),
267
- {'message': 'No words with synonyms found in the selected phrase.', 'original_indices': {'end': 18, 'start': 20}, 'original_phrase': 'some phrase', 'results': [], 'success': True}
 
 
 
268
  )
269
 
270
  def test_get_synonyms_for_phrase_error_validation(self):
 
12
  from my_ghost_writer.app import app, mongo_health_check_background_task, lifespan
13
  from my_ghost_writer.constants import app_logger
14
  from tests import EVENTS_FOLDER
15
+ from my_ghost_writer.jsonpath_comparator import JSONPathComparator
16
+ from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
17
+ from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists
18
 
19
 
20
  class TestVersion(unittest.TestCase):
 
213
  # --- /thesaurus-inflated-phrase Endpoint ---
214
  def test_get_synonyms_for_phrase_success(self):
215
  """Tests the success case for /thesaurus-inflated-phrase."""
 
216
  import json
217
+ with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated.json", "r") as src:
218
+ expected_response = json.load(src)
219
+ with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated_structure.json", "r") as src:
220
+ expected_detailed_report = json.load(src)
221
+ self.maxDiff = None
222
  body = {
223
  "word": "rather severe-looking woman",
224
  "text": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
 
227
  }
228
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
229
  self.assertEqual(response.status_code, 200)
230
+ response_json = response.json()
231
+ duration = response_json["duration"]
232
+ self.assertIsInstance(duration, float)
233
+ self.assertGreater(duration, 0)
234
+ del response_json["duration"]
235
+ del expected_response["duration"]
236
+ app_logger.info(f"response_json.keys():{response_json.keys()}.")
237
+ self.assertEqual(response_json["success"], expected_response["success"])
238
+ self.assertEqual(response_json["original_phrase"], expected_response["original_phrase"])
239
+ self.assertEqual(response_json["original_indices"], expected_response["original_indices"])
240
+ self.assertEqual(response_json["message"], expected_response["message"])
241
  # check only the first result
242
+ comparator = JSONPathComparator()
243
+ comparison = comparator.compare_structures(response_json["results"], expected_response["results"])
244
+ assert comparison is not None
245
+ added = comparison.get('added_paths')
246
+ removed = comparison.get("removed_paths")
247
+ self.assertEqual(added, set())
248
+ self.assertEqual(removed, set())
249
+ analyzer = JSONPathStructureAnalyzer()
250
+ analyzer.extract_all_paths(response_json["results"])
251
+ detailed_report = analyzer.get_detailed_type_report()
252
+ analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)
253
+
254
+ # with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated_structure.json", "w") as src:
255
+ # json.dump(detailed_report, src)
256
 
257
  def test_get_synonyms_for_phrase_no_synonyms(self):
258
  """Tests the case where no synonyms are found for the phrase."""
 
263
  "end": 18
264
  }
265
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
266
+ response_json = response.json()
267
  self.assertEqual(response.status_code, 200)
268
+ duration = response_json["duration"]
269
+ self.assertIsInstance(duration, float)
270
+ self.assertGreater(duration, 0)
271
+ del response_json["duration"]
272
+ self.assertEqual(response_json, {
273
  "success": True,
274
  "original_phrase": "some phrase",
275
  "original_indices": {
 
290
  }
291
  response = self.client.post("/thesaurus-inflated-phrase", json=body)
292
  self.assertEqual(response.status_code, 200)
293
+ response_json = response.json()
294
+ duration = response_json["duration"]
295
+ self.assertIsInstance(duration, float)
296
+ self.assertGreater(duration, 0)
297
+ del response_json["duration"]
298
  self.assertDictEqual(
299
+ response_json,
300
+ {
301
+ 'success': True, 'original_phrase': 'some phrase', 'original_indices': {'start': 20, 'end': 18},
302
+ 'results': [], 'message': 'No words with synonyms found in the selected phrase.'
303
+ }
304
  )
305
 
306
  def test_get_synonyms_for_phrase_error_validation(self):
tests/my_ghost_writer/test_custom_synonym_handler.py CHANGED
@@ -1,16 +1,20 @@
1
  import unittest
2
 
3
  from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
 
4
 
5
 
6
  class TestCustomSynonymHandler(unittest.TestCase):
7
  def test_custom_synonym_handler_add_entry_ok1(self):
8
  word_input = "happy"
9
- related_input = [
10
- {'definition': 'definition of happy', 'type': 'synonym', 'words': ['joy', 'cheer']},
11
- {'definition': 'definition of sad', 'type': 'antonym', 'words': ['sad', 'sadness']},
12
- {'definition': 'another definition of happy', 'type': 'synonym', 'words': ['content', 'cheerful', 'joyful']}
13
- ]
 
 
 
14
  test_custom_synonym_handler = CustomSynonymHandler()
15
  self.assertEqual(test_custom_synonym_handler.inverted_index, {})
16
  self.assertEqual(test_custom_synonym_handler.lexicon, {})
@@ -48,16 +52,16 @@ class TestCustomSynonymHandler(unittest.TestCase):
48
  self.assertEqual(test_custom_synonym_handler.lexicon, expected_lexicon)
49
  self.assertEqual(test_custom_synonym_handler.inverted_index, expected_inverted_index)
50
 
51
- synonyms_related = test_custom_synonym_handler.get_related("happy", "synonym")
52
  self.assertListEqual(synonyms_related, [
53
  {'definition': 'definition of happy', 'words': ['joy', 'cheer']},
54
  {'definition': 'another definition of happy', 'words': ['content', 'cheerful', 'joyful']}
55
  ])
56
- antonyms_related = test_custom_synonym_handler.get_related("happy", "antonym")
57
  self.assertListEqual(antonyms_related, [{'definition': 'definition of sad', 'words': ['sad', 'sadness']}])
58
 
59
  test_custom_synonym_handler.add_entry("text", [
60
- {'definition': 'definition of text', 'type': 'synonym', 'words': ['word', 'sentence']}
61
  ])
62
  self.assertEqual(test_custom_synonym_handler.lexicon, {
63
  **{"text": {'synonym': [{'definition': 'definition of text', 'words': ['word', 'sentence']}]}},
 
1
  import unittest
2
 
3
  from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
4
+ from my_ghost_writer.type_hints import RelatedEntry, TermRelationships
5
 
6
 
7
  class TestCustomSynonymHandler(unittest.TestCase):
8
  def test_custom_synonym_handler_add_entry_ok1(self):
9
  word_input = "happy"
10
+ related_input = []
11
+ for rel in [
12
+ {'definition': 'definition of happy', 'type': 'synonym', 'words': ['joy', 'cheer']},
13
+ {'definition': 'definition of sad', 'type': 'antonym', 'words': ['sad', 'sadness']},
14
+ {'definition': 'another definition of happy', 'type': 'synonym', 'words': ['content', 'cheerful', 'joyful']}
15
+ ]:
16
+ tmp = RelatedEntry(**rel)
17
+ related_input.append(tmp)
18
  test_custom_synonym_handler = CustomSynonymHandler()
19
  self.assertEqual(test_custom_synonym_handler.inverted_index, {})
20
  self.assertEqual(test_custom_synonym_handler.lexicon, {})
 
52
  self.assertEqual(test_custom_synonym_handler.lexicon, expected_lexicon)
53
  self.assertEqual(test_custom_synonym_handler.inverted_index, expected_inverted_index)
54
 
55
+ synonyms_related = test_custom_synonym_handler.get_related("happy", TermRelationships.SYNONYM)
56
  self.assertListEqual(synonyms_related, [
57
  {'definition': 'definition of happy', 'words': ['joy', 'cheer']},
58
  {'definition': 'another definition of happy', 'words': ['content', 'cheerful', 'joyful']}
59
  ])
60
+ antonyms_related = test_custom_synonym_handler.get_related("happy", TermRelationships.ANTONYM)
61
  self.assertListEqual(antonyms_related, [{'definition': 'definition of sad', 'words': ['sad', 'sadness']}])
62
 
63
  test_custom_synonym_handler.add_entry("text", [
64
+ RelatedEntry(**{'definition': 'definition of text', 'type': 'synonym', 'words': ['word', 'sentence']})
65
  ])
66
  self.assertEqual(test_custom_synonym_handler.lexicon, {
67
  **{"text": {'synonym': [{'definition': 'definition of text', 'words': ['word', 'sentence']}]}},
tests/my_ghost_writer/test_extract_jsonpaths.py ADDED
@@ -0,0 +1,1440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ import json
3
+
4
+ from my_ghost_writer.constants import app_logger
5
+ from my_ghost_writer.jsonpath_comparator import (compare_json_with_jsonpath, compare_json_with_jsonpath_and_types,
6
+ JSONPathComparator, extract_structure_paths, extract_structure_paths_with_types)
7
+ from my_ghost_writer.jsonpath_extractor import (JSONPathStructureAnalyzer, analyze_with_jsonpath_detailed,
8
+ analyze_with_jsonpath_types, analyze_with_jsonpath, compare_json_with_jsonpath_structures, analyze_dict_list_simple)
9
+
10
+
11
+ old_json = {
12
+ "key1": "row 1",
13
+ "key2": 22,
14
+ "key_nested1": {
15
+ "key_nested2": "row 3",
16
+ "key_nested3": "row 4",
17
+ "array_nested_4": [
18
+ "row 5",
19
+ "row 6",
20
+ "row 7 nested",
21
+ {
22
+ "key_nested4": "row 8",
23
+ "array_nested_5": ["row 9", "row 10"]
24
+ }
25
+ ]
26
+ }
27
+ }
28
+
29
+ new_json = {
30
+ "key1": "row 1",
31
+ "key2": 22,
32
+ "key_nested1": {
33
+ "key_nested2": "row 3",
34
+ "key_nested3": "row 4",
35
+ "array_changed_4": [
36
+ "row 5",
37
+ "row changed 6",
38
+ "row 7 nested",
39
+ {
40
+ "last_change": "row 8",
41
+ "array_nested_5": ["row 9", "row 10"]
42
+ }
43
+ ]
44
+ }
45
+ }
46
+
47
+
48
+ class TestJSONPathStructureAnalyzer(unittest.TestCase):
49
+ """
50
+ Test JSONPath structure analysis with the provided nested JSON data
51
+ """
52
+
53
+ def test_get_paths_with_types_basic(self):
54
+ """
55
+ Test get_paths_with_types with basic data types
56
+ """
57
+ test_data = {
58
+ "string_field": "hello",
59
+ "int_field": 42,
60
+ "float_field": 3.14,
61
+ "bool_field": True,
62
+ "null_field": None
63
+ }
64
+
65
+ analyzer = JSONPathStructureAnalyzer()
66
+ analyzer.extract_all_paths(test_data)
67
+ paths_with_types = analyzer.get_paths_with_types()
68
+
69
+ # Verify basic types
70
+ self.assertEqual(paths_with_types["$.string_field"], "str")
71
+ self.assertEqual(paths_with_types["$.int_field"], "int")
72
+ self.assertEqual(paths_with_types["$.float_field"], "float")
73
+ self.assertEqual(paths_with_types["$.bool_field"], "bool")
74
+ self.assertEqual(paths_with_types["$.null_field"], "NoneType")
75
+
76
+ def test_get_paths_with_types_arrays(self):
77
+ """
78
+ Test get_paths_with_types with arrays
79
+ """
80
+ test_data = {
81
+ "simple_array": [1, 2, 3],
82
+ "empty_array": [],
83
+ "mixed_array": ["string", 42, True],
84
+ "nested_array": [[1, 2], [3, 4]]
85
+ }
86
+
87
+ analyzer = JSONPathStructureAnalyzer()
88
+ analyzer.extract_all_paths(test_data)
89
+ paths_with_types = analyzer.get_paths_with_types()
90
+
91
+ self.assertEqual(paths_with_types["$.simple_array[*]"], "array")
92
+ self.assertEqual(paths_with_types["$.empty_array[*]"], "array")
93
+ self.assertEqual(paths_with_types["$.mixed_array[*]"], "array")
94
+ self.assertEqual(paths_with_types["$.nested_array[*]"], "array")
95
+
96
+ def test_get_paths_with_types_with_old_json(self):
97
+ """
98
+ Test get_paths_with_types with the old_json test data
99
+ """
100
+ analyzer = JSONPathStructureAnalyzer()
101
+ analyzer.extract_all_paths(old_json)
102
+ paths_with_types = analyzer.get_paths_with_types()
103
+
104
+ # Test specific paths from old_json
105
+ self.assertEqual(paths_with_types["$.key1"], "str")
106
+ self.assertEqual(paths_with_types["$.key2"], "int")
107
+ self.assertEqual(paths_with_types["$.key_nested1"], "dict")
108
+ self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "array")
109
+ self.assertEqual(paths_with_types["$.key_nested1.key_nested2"], "str")
110
+
111
+ # Verify all expected paths are present
112
+ expected_paths = [
113
+ "$.key1", "$.key2", "$.key_nested1",
114
+ "$.key_nested1.key_nested2", "$.key_nested1.key_nested3",
115
+ "$.key_nested1.array_nested_4[*]"
116
+ ]
117
+
118
+ for path in expected_paths:
119
+ self.assertIn(path, paths_with_types, f"Path {path} should be in paths_with_types")
120
+
121
+ def test_get_detailed_type_report_basic(self):
122
+ """
123
+ Test get_detailed_type_report with basic data
124
+ """
125
+ test_data = {
126
+ "test_field": "sample_value",
127
+ "array_field": [1, 2, 3]
128
+ }
129
+
130
+ analyzer = JSONPathStructureAnalyzer()
131
+ analyzer.extract_all_paths(test_data)
132
+ detailed_report = analyzer.get_detailed_type_report()
133
+
134
+ # Test structure of the detailed report
135
+ self.assertIn("$.test_field", detailed_report)
136
+ self.assertIn("$.array_field[*]", detailed_report)
137
+
138
+ # Test field details
139
+ field_info = detailed_report["$.test_field"]
140
+ self.assertIn("types", field_info)
141
+ self.assertIn("primary_type", field_info)
142
+ self.assertIn("is_array", field_info)
143
+ self.assertIn("samples", field_info)
144
+ self.assertIn("sample_count", field_info)
145
+
146
+ # Verify field values
147
+ self.assertEqual(field_info["primary_type"], "str")
148
+ self.assertFalse(field_info["is_array"])
149
+ self.assertIn("sample_value", field_info["samples"])
150
+ self.assertGreater(field_info["sample_count"], 0)
151
+
152
+ # Test array field details
153
+ array_info = detailed_report["$.array_field[*]"]
154
+ self.assertTrue(array_info["is_array"])
155
+ self.assertEqual(array_info["primary_type"], "array")
156
+ self.assertEqual(array_info['array_length'], 3)
157
+
158
+ def test_get_detailed_type_report_with_old_json(self):
159
+ """
160
+ Test get_detailed_type_report with old_json data
161
+ """
162
+ analyzer = JSONPathStructureAnalyzer()
163
+ analyzer.extract_all_paths(old_json)
164
+ detailed_report = analyzer.get_detailed_type_report()
165
+
166
+ # Test specific fields from old_json
167
+ key1_info = detailed_report["$.key1"]
168
+ self.assertEqual(key1_info["primary_type"], "str")
169
+ self.assertFalse(key1_info["is_array"])
170
+ self.assertIn("row 1", key1_info["samples"])
171
+
172
+ key2_info = detailed_report["$.key2"]
173
+ self.assertEqual(key2_info["primary_type"], "int")
174
+ self.assertFalse(key2_info["is_array"])
175
+ self.assertIn("22", key2_info["samples"])
176
+
177
+ # Test array field
178
+ array_info = detailed_report["$.key_nested1.array_nested_4[*]"]
179
+ self.assertTrue(array_info["is_array"])
180
+ self.assertEqual(array_info["primary_type"], "array")
181
+ self.assertEqual(array_info["array_length"], 4)
182
+
183
+ def test_get_detailed_type_report_mixed_types(self):
184
+ """
185
+ Test get_detailed_type_report with mixed types (hypothetical case)
186
+ """
187
+ # Create a scenario where a path might have mixed types
188
+ analyzer = JSONPathStructureAnalyzer()
189
+
190
+ # Manually add mixed type data to test the logic
191
+ analyzer.paths.add("$.mixed_field")
192
+ analyzer.types["$.mixed_field"].add("str")
193
+ analyzer.types["$.mixed_field"].add("int")
194
+ analyzer.samples["$.mixed_field"] = ["hello", "42"]
195
+
196
+ detailed_report = analyzer.get_detailed_type_report()
197
+
198
+ mixed_info = detailed_report["$.mixed_field"]
199
+ self.assertIn("mixed(", mixed_info["primary_type"])
200
+ self.assertFalse(mixed_info["is_array"])
201
+ self.assertEqual(len(mixed_info["types"]), 2)
202
+
203
+ def test_analyze_with_jsonpath_types_function(self):
204
+ """
205
+ Test the convenience function analyze_with_jsonpath_types
206
+ """
207
+ test_data = {
208
+ "name": "test",
209
+ "count": 5,
210
+ "items": ["a", "b", "c"]
211
+ }
212
+
213
+ paths_with_types = analyze_with_jsonpath_types(test_data)
214
+
215
+ # Verify function returns expected structure
216
+ self.assertIsInstance(paths_with_types, dict)
217
+ self.assertIn("$.name", paths_with_types)
218
+ self.assertIn("$.count", paths_with_types)
219
+ self.assertIn("$.items[*]", paths_with_types)
220
+
221
+ # Verify types
222
+ self.assertEqual(paths_with_types["$.name"], "str")
223
+ self.assertEqual(paths_with_types["$.count"], "int")
224
+ self.assertEqual(paths_with_types["$.items[*]"], "array")
225
+
226
+ def test_analyze_with_jsonpath_detailed_function(self):
227
+ """
228
+ Test the convenience function analyze_with_jsonpath_detailed
229
+ """
230
+ test_data = {
231
+ "description": "test description",
232
+ "tags": ["tag1", "tag2"]
233
+ }
234
+
235
+ detailed_info = analyze_with_jsonpath_detailed(test_data)
236
+
237
+ # Verify function returns expected structure
238
+ self.assertIsInstance(detailed_info, dict)
239
+ self.assertIn("$.description", detailed_info)
240
+ self.assertIn("$.tags[*]", detailed_info)
241
+
242
+ # Verify detailed structure
243
+ desc_info = detailed_info["$.description"]
244
+ self.assertIn("types", desc_info)
245
+ self.assertIn("primary_type", desc_info)
246
+ self.assertIn("samples", desc_info)
247
+ self.assertEqual(desc_info["primary_type"], "str")
248
+
249
+ tags_info = detailed_info["$.tags[*]"]
250
+ self.assertTrue(tags_info["is_array"])
251
+ self.assertEqual(tags_info["primary_type"], "array")
252
+ self.assertEqual(tags_info["array_length"], 2)
253
+
254
+ def test_get_paths_with_types_empty_data(self):
255
+ """
256
+ Test get_paths_with_types with empty data
257
+ """
258
+ analyzer = JSONPathStructureAnalyzer()
259
+ analyzer.extract_all_paths({})
260
+ paths_with_types = analyzer.get_paths_with_types()
261
+
262
+ # Should return empty dict for empty input
263
+ self.assertEqual(len(paths_with_types), 0)
264
+
265
+ def test_get_detailed_type_report_empty_data(self):
266
+ """
267
+ Test get_detailed_type_report with empty data
268
+ """
269
+ analyzer = JSONPathStructureAnalyzer()
270
+ analyzer.extract_all_paths({})
271
+ detailed_report = analyzer.get_detailed_type_report()
272
+
273
+ # Should return empty dict for empty input
274
+ self.assertEqual(len(detailed_report), 0)
275
+
276
+ def test_paths_with_types_comparison_old_vs_new(self):
277
+ """
278
+ Test comparing paths with types between old and new JSON
279
+ """
280
+ analyzer_old = JSONPathStructureAnalyzer()
281
+ analyzer_old.extract_all_paths(old_json)
282
+ old_paths_with_types = analyzer_old.get_paths_with_types()
283
+
284
+ analyzer_new = JSONPathStructureAnalyzer()
285
+ analyzer_new.extract_all_paths(new_json)
286
+ new_paths_with_types = analyzer_new.get_paths_with_types()
287
+
288
+ # Find differences
289
+ old_only = set(old_paths_with_types.keys()) - set(new_paths_with_types.keys())
290
+ new_only = set(new_paths_with_types.keys()) - set(old_paths_with_types.keys())
291
+ common = set(old_paths_with_types.keys()) & set(new_paths_with_types.keys())
292
+
293
+ # Verify expected differences
294
+ self.assertIn("$.key_nested1.array_nested_4[*]", old_only)
295
+ self.assertIn("$.key_nested1.array_changed_4[*]", new_only)
296
+
297
+ # Verify common paths have same types
298
+ for path in common:
299
+ self.assertEqual(old_paths_with_types[path], new_paths_with_types[path],
300
+ f"Type mismatch for common path {path}")
301
+
302
+ def test_detailed_report_comparison_old_vs_new(self):
303
+ """
304
+ Test comparing detailed reports between old and new JSON
305
+ """
306
+ old_detailed = analyze_with_jsonpath_detailed(old_json)
307
+ new_detailed = analyze_with_jsonpath_detailed(new_json)
308
+
309
+ # Check that common fields have consistent detailed info
310
+ common_paths = set(old_detailed.keys()) & set(new_detailed.keys())
311
+
312
+ for path in common_paths:
313
+ old_info = old_detailed[path]
314
+ new_info = new_detailed[path]
315
+
316
+ # Primary types should match for common paths
317
+ self.assertEqual(old_info["primary_type"], new_info["primary_type"],
318
+ f"Primary type mismatch for {path}")
319
+
320
+ # Array status should match
321
+ self.assertEqual(old_info["is_array"], new_info["is_array"],
322
+ f"Array status mismatch for {path}")
323
+
324
+ def test_integration_all_new_methods(self):
325
+ """
326
+ Integration test using all new methods together
327
+ """
328
+ test_data = {
329
+ "user": {
330
+ "name": "John Doe",
331
+ "age": 30,
332
+ "hobbies": ["reading", "coding", "gaming"],
333
+ "profile": {
334
+ "active": True,
335
+ "settings": {
336
+ "theme": "dark",
337
+ "notifications": False
338
+ }
339
+ }
340
+ }
341
+ }
342
+
343
+ # Test all three approaches
344
+ structure_report = analyze_with_jsonpath(test_data)
345
+ paths_with_types = analyze_with_jsonpath_types(test_data)
346
+ detailed_info = analyze_with_jsonpath_detailed(test_data)
347
+
348
+ # Verify all methods found the same paths
349
+ report_paths = set()
350
+ for line in structure_report.split('\n'):
351
+ if ' -- ' in line:
352
+ path = line.split(' -- ')[0]
353
+ report_paths.add(path)
354
+
355
+ types_paths = set(paths_with_types.keys())
356
+ detailed_paths = set(detailed_info.keys())
357
+
358
+ # All methods should find the same paths
359
+ self.assertEqual(report_paths, types_paths)
360
+ self.assertEqual(types_paths, detailed_paths)
361
+
362
+ # Verify specific expected paths exist
363
+ expected_paths = [
364
+ "$.user",
365
+ "$.user.name",
366
+ "$.user.age",
367
+ "$.user.hobbies[*]",
368
+ "$.user.profile",
369
+ "$.user.profile.active",
370
+ "$.user.profile.settings",
371
+ "$.user.profile.settings.theme",
372
+ "$.user.profile.settings.notifications"
373
+ ]
374
+
375
+ for path in expected_paths:
376
+ self.assertIn(path, types_paths, f"Path {path} should be found by all methods")
377
+ self.assertIn(path, detailed_paths, f"Path {path} should be in detailed info")
378
+
379
+ def test_type_consistency_across_methods(self):
380
+ """
381
+ Test that type information is consistent across different methods
382
+ """
383
+ analyzer = JSONPathStructureAnalyzer()
384
+ analyzer.extract_all_paths(old_json)
385
+
386
+ # Get data using different methods
387
+ # structure_report = analyzer.get_structure_report()
388
+ paths_with_types = analyzer.get_paths_with_types()
389
+ detailed_report = analyzer.get_detailed_type_report()
390
+
391
+ # For each path, verify consistency
392
+ for path in paths_with_types:
393
+ # Detailed report should have the same primary type
394
+ if path in detailed_report:
395
+ detailed_type = detailed_report[path]["primary_type"]
396
+ simple_type = paths_with_types[path]
397
+
398
+ # They should match (detailed might have more info for mixed types)
399
+ if not detailed_type.startswith("mixed("):
400
+ self.assertEqual(simple_type, detailed_type,
401
+ f"Type inconsistency for {path}: {simple_type} vs {detailed_type}")
402
+
403
+ def test_extract_all_paths_from_old_structure(self):
404
+ """
405
+ Test that analyzer correctly extracts all paths from the old JSON structure
406
+ """
407
+ analyzer = JSONPathStructureAnalyzer()
408
+ paths = analyzer.extract_all_paths(old_json)
409
+
410
+ # Verify the top-level paths
411
+ self.assertIn("$.key1", paths)
412
+ self.assertIn("$.key2", paths)
413
+ self.assertIn("$.key_nested1", paths)
414
+
415
+ # Verify the nested object paths
416
+ self.assertIn("$.key_nested1.key_nested2", paths)
417
+ self.assertIn("$.key_nested1.key_nested3", paths)
418
+ self.assertIn("$.key_nested1.array_nested_4[*]", paths)
419
+
420
+ # Verify the deeply nested paths (3-4 levels deep)
421
+ self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", paths)
422
+ self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*]", paths)
423
+
424
+ def test_extract_all_paths_from_new_structure(self):
425
+ """
426
+ Test that analyzer correctly extracts paths from the new JSON structure
427
+ """
428
+ analyzer = JSONPathStructureAnalyzer()
429
+ paths = analyzer.extract_all_paths(new_json)
430
+
431
+ # Verify renamed the array path
432
+ self.assertIn("$.key_nested1.array_changed_4[*]", paths)
433
+
434
+ # Verify renamed the nested key
435
+ self.assertIn("$.key_nested1.array_changed_4[*].last_change", paths)
436
+
437
+ # Verify unchanged the nested array
438
+ self.assertIn("$.key_nested1.array_changed_4[*].array_nested_5[*]", paths)
439
+
440
+ def test_structure_report_format_old_json(self):
441
+ """
442
+ Test structure report format for old JSON
443
+ """
444
+ analyzer = JSONPathStructureAnalyzer()
445
+ analyzer.extract_all_paths(old_json)
446
+ report = analyzer.get_structure_report()
447
+
448
+ # Check specific format elements
449
+ self.assertIn("$.key1 -- row 1", report)
450
+ self.assertIn("$.key2 -- 22", report)
451
+ self.assertIn("$.key_nested1.array_nested_4[*] -- array[4]", report)
452
+ self.assertIn("$.key_nested1.array_nested_4[*].key_nested4 -- row 8", report)
453
+ self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
454
+
455
+ def test_structure_report_format_new_json(self):
456
+ """
457
+ Test structure report format for new JSON
458
+ """
459
+ analyzer = JSONPathStructureAnalyzer()
460
+ analyzer.extract_all_paths(new_json)
461
+ report = analyzer.get_structure_report()
462
+
463
+ # Check renamed elements appear correctly
464
+ self.assertIn("$.key_nested1.array_changed_4[*] -- array[4]", report)
465
+ self.assertIn("$.key_nested1.array_changed_4[*].last_change -- row 8", report)
466
+
467
+ # Check unchanged elements
468
+ self.assertIn("$.key1 -- row 1", report)
469
+ self.assertIn("$.key2 -- 22", report)
470
+
471
+ def test_analyze_with_jsonpath_function(self):
472
+ """
473
+ Test the convenience function for structure analysis
474
+ """
475
+ old_report = analyze_with_jsonpath(old_json)
476
+ new_report = analyze_with_jsonpath(new_json)
477
+
478
+ # Verify both reports are valid strings
479
+ self.assertIsInstance(old_report, str)
480
+ self.assertGreater(len(old_report), 0)
481
+ self.assertIsInstance(new_report, str)
482
+ self.assertGreater(len(new_report), 0)
483
+
484
+ # Verify key differences
485
+ self.assertIn("array_nested_4", old_report)
486
+ self.assertIn("array_changed_4", new_report)
487
+ self.assertIn("key_nested4", old_report)
488
+ self.assertIn("last_change", new_report)
489
+
490
+ def test_compare_json_structures_method(self):
491
+ """
492
+ Test the compare_json_structures method directly
493
+ """
494
+ analyzer = JSONPathStructureAnalyzer()
495
+ analyzer.extract_all_paths(old_json)
496
+
497
+ comparison = analyzer.compare_json_structures(new_json)
498
+
499
+ # Test all expected keys within the comparison result
500
+ expected_keys = [
501
+ "added_paths", "removed_paths", "common_paths",
502
+ "type_changes", "value_differences", "array_size_changes",
503
+ "array_lengths_old", "array_lengths_new", "summary"
504
+ ]
505
+
506
+ for key in expected_keys:
507
+ self.assertIn(key, comparison, f"Key {key} should be in comparison result")
508
+
509
+ # Test summary statistics
510
+ summary = comparison["summary"]
511
+ self.assertGreater(summary["total_paths_old"], 0)
512
+ self.assertGreater(summary["total_paths_new"], 0)
513
+ self.assertGreater(summary["paths_removed"], 0)
514
+ self.assertGreater(summary["paths_added"], 0)
515
+
516
+ # Test array length tracking
517
+ self.assertIn("$.key_nested1.array_nested_4[*]", comparison["array_lengths_old"])
518
+ self.assertIn("$.key_nested1.array_changed_4[*]", comparison["array_lengths_new"])
519
+ self.assertEqual(comparison["array_lengths_old"]["$.key_nested1.array_nested_4[*]"], 4)
520
+ self.assertEqual(comparison["array_lengths_new"]["$.key_nested1.array_changed_4[*]"], 4)
521
+
522
+ def test_get_array_lengths_method(self):
523
+ """
524
+ Test the get_array_lengths method
525
+ """
526
+ test_data = {
527
+ "empty_array": [],
528
+ "small_array": [1, 2],
529
+ "large_array": list(range(100)),
530
+ "nested": {
531
+ "inner_array": ["a", "b", "c", "d", "e"]
532
+ }
533
+ }
534
+
535
+ analyzer = JSONPathStructureAnalyzer()
536
+ analyzer.extract_all_paths(test_data)
537
+ array_lengths = analyzer.get_array_lengths()
538
+
539
+ # Test all array lengths are captured
540
+ self.assertEqual(array_lengths["$.empty_array[*]"], 0)
541
+ self.assertEqual(array_lengths["$.small_array[*]"], 2)
542
+ self.assertEqual(array_lengths["$.large_array[*]"], 100)
543
+ self.assertEqual(array_lengths["$.nested.inner_array[*]"], 5)
544
+
545
+ # Test that non-array paths are not in array_lengths
546
+ for path in array_lengths.keys():
547
+ self.assertTrue(path.endswith("[*]"), f"Array length path {path} should end with [*]")
548
+
549
+ def test_value_differences_detection(self):
550
+ """
551
+ Test detection of value changes in compare_json_structures
552
+ """
553
+ old_data = {
554
+ "name": "John",
555
+ "age": 25,
556
+ "city": "New York"
557
+ }
558
+
559
+ new_data = {
560
+ "name": "John", # unchanged
561
+ "age": 26, # changed
562
+ "city": "Boston" # changed
563
+ }
564
+
565
+ analyzer = JSONPathStructureAnalyzer()
566
+ analyzer.extract_all_paths(old_data)
567
+ comparison = analyzer.compare_json_structures(new_data)
568
+
569
+ # Should detect value changes
570
+ self.assertIn("$.age", comparison["value_differences"])
571
+ self.assertIn("$.city", comparison["value_differences"])
572
+ self.assertNotIn("$.name", comparison["value_differences"])
573
+
574
+ # Test change details
575
+ age_change = comparison["value_differences"]["$.age"]
576
+ self.assertEqual(age_change["old_value"], "25")
577
+ self.assertEqual(age_change["new_value"], "26")
578
+
579
+ city_change = comparison["value_differences"]["$.city"]
580
+ self.assertEqual(city_change["old_value"], "New York")
581
+ self.assertEqual(city_change["new_value"], "Boston")
582
+
583
+ def test_array_size_changes_detection(self):
584
+ """
585
+ Test detection of array size changes
586
+ """
587
+ old_data = {
588
+ "items": [1, 2, 3],
589
+ "tags": ["a", "b"]
590
+ }
591
+
592
+ new_data = {
593
+ "items": [1, 2, 3, 4, 5], # size increased
594
+ "tags": ["a"] # size decreased
595
+ }
596
+
597
+ analyzer = JSONPathStructureAnalyzer()
598
+ analyzer.extract_all_paths(old_data)
599
+ comparison = analyzer.compare_json_structures(new_data)
600
+
601
+ # Should detect array size changes
602
+ self.assertIn("$.items[*]", comparison["array_size_changes"])
603
+ self.assertIn("$.tags[*]", comparison["array_size_changes"])
604
+
605
+ # Test size change details
606
+ items_change = comparison["array_size_changes"]["$.items[*]"]
607
+ self.assertEqual(items_change["old_size"], 3)
608
+ self.assertEqual(items_change["new_size"], 5)
609
+ self.assertEqual(items_change["size_change"], 2)
610
+
611
+ tags_change = comparison["array_size_changes"]["$.tags[*]"]
612
+ self.assertEqual(tags_change["old_size"], 2)
613
+ self.assertEqual(tags_change["new_size"], 1)
614
+ self.assertEqual(tags_change["size_change"], -1)
615
+
616
+ def test_compare_json_with_jsonpath_structures_function(self):
617
+ """
618
+ Test the compare_json_with_jsonpath_structures convenience function
619
+ """
620
+ # Test with print_report=False
621
+ comparison = compare_json_with_jsonpath_structures(old_json, new_json, print_report=False)
622
+
623
+ # Should return the same structure as the method
624
+ self.assertIn("summary", comparison)
625
+ self.assertIn("added_paths", comparison)
626
+ self.assertIn("removed_paths", comparison)
627
+
628
+ # Test that it works without printing (no exception thrown)
629
+ self.assertIsInstance(comparison, dict)
630
+
631
+ # Test specific changes
632
+ self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
633
+ self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
634
+
635
+ def test_nested_arrays_length_tracking(self):
636
+ """
637
+ Test array length tracking for deeply nested arrays
638
+ """
639
+ test_data = {
640
+ "level1": [
641
+ {"level2": [1, 2, 3]},
642
+ {"level2": [4, 5]},
643
+ {"level2": [6, 7, 8, 9]}
644
+ ]
645
+ }
646
+
647
+ analyzer = JSONPathStructureAnalyzer()
648
+ analyzer.extract_all_paths(test_data)
649
+ array_lengths = analyzer.get_array_lengths()
650
+
651
+ # Should track both levels of arrays
652
+ self.assertIn("$.level1[*]", array_lengths)
653
+ self.assertIn("$.level1[*].level2[*]", array_lengths)
654
+
655
+ # Check lengths
656
+ self.assertEqual(array_lengths["$.level1[*]"], 3)
657
+ # Note: The nested array length will be from the last item processed (current implementation)
658
+ self.assertEqual(array_lengths["$.level1[*].level2[*]"], 4)
659
+
660
+ def test_type_changes_detection_in_comparison(self):
661
+ """
662
+ Test detection of type changes in compare_json_structures
663
+ """
664
+ old_data = {
665
+ "field1": "string_value",
666
+ "field2": 42,
667
+ "field3": [1, 2, 3]
668
+ }
669
+
670
+ new_data = {
671
+ "field1": 123,
672
+ "field2": 42,
673
+ "field3": "not_array"
674
+ }
675
+
676
+ analyzer = JSONPathStructureAnalyzer()
677
+ analyzer.extract_all_paths(old_data)
678
+ comparison = analyzer.compare_json_structures(new_data)
679
+
680
+ # Should detect type changes
681
+ self.assertIn("$.field1", comparison["type_changes"])
682
+ self.assertIn("$.field3", comparison["type_changes"])
683
+ self.assertNotIn("$.field2", comparison["type_changes"])
684
+
685
+ # Test change details
686
+ field1_change = comparison["type_changes"]["$.field1"]
687
+ self.assertEqual(field1_change["old_type"], "str")
688
+ self.assertEqual(field1_change["new_type"], "int")
689
+
690
+ field3_change = comparison["type_changes"]["$.field3"]
691
+ self.assertEqual(field3_change["new_type"], "str")
692
+ # This will check the type and expect a list
693
+ self.assertEqual(field3_change["old_type"], "list")
694
+
695
+ def test_analyze_dict_list_simple(self):
696
+ """
697
+ Test analyze_dict_list_simple function with a list of dictionaries
698
+ """
699
+ dict_list = [
700
+ {
701
+ "user": "john",
702
+ "age": 25,
703
+ "tags": ["admin", "user"]
704
+ },
705
+ {
706
+ "user": "jane",
707
+ "age": 30,
708
+ "tags": ["user"],
709
+ "active": True
710
+ },
711
+ {
712
+ "user": "bob",
713
+ "score": 95.5,
714
+ "tags": ["guest", "temp", "new"]
715
+ }
716
+ ]
717
+
718
+ # Test the function
719
+ results = analyze_dict_list_simple(dict_list)
720
+
721
+ # Basic structure tests
722
+ self.assertEqual(len(results), 3)
723
+ self.assertIsInstance(results, list)
724
+
725
+ # Test each result has expected keys
726
+ for i, result in enumerate(results):
727
+ self.assertEqual(result["index"], i)
728
+ self.assertIn("paths_with_types", result)
729
+ self.assertIn("detailed_report", result)
730
+ self.assertIn("array_lengths", result)
731
+ self.assertIn("structure_report", result)
732
+
733
+ # Test first dict analysis
734
+ first_result = results[0]
735
+ self.assertIn("$.user", first_result["paths_with_types"])
736
+ self.assertIn("$.age", first_result["paths_with_types"])
737
+ self.assertIn("$.tags[*]", first_result["paths_with_types"])
738
+ self.assertEqual(first_result["paths_with_types"]["$.user"], "str")
739
+ self.assertEqual(first_result["paths_with_types"]["$.age"], "int")
740
+ self.assertEqual(first_result["paths_with_types"]["$.tags[*]"], "array")
741
+ self.assertEqual(first_result["array_lengths"]["$.tags[*]"], 2)
742
+
743
+ # Test second dict has additional field
744
+ second_result = results[1]
745
+ self.assertIn("$.active", second_result["paths_with_types"])
746
+ self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
747
+ self.assertEqual(second_result["array_lengths"]["$.tags[*]"], 1)
748
+
749
+ # Test third dict differences
750
+ third_result = results[2]
751
+ self.assertIn("$.score", third_result["paths_with_types"])
752
+ self.assertNotIn("$.age", third_result["paths_with_types"]) # age missing in third dict
753
+ self.assertEqual(third_result["paths_with_types"]["$.score"], "float")
754
+ self.assertEqual(third_result["array_lengths"]["$.tags[*]"], 3)
755
+
756
+ # Test structure reports are strings
757
+ for result in results:
758
+ self.assertIsInstance(result["structure_report"], str)
759
+ self.assertGreater(len(result["structure_report"]), 0)
760
+
761
+ # Test detailed reports have proper structure
762
+ for result in results:
763
+ detailed = result["detailed_report"]
764
+ for path, info in detailed.items():
765
+ self.assertIn("types", info)
766
+ self.assertIn("primary_type", info)
767
+ self.assertIn("is_array", info)
768
+ self.assertIn("samples", info)
769
+ self.assertIn("sample_count", info)
770
+
771
+ def test_filter_paths_excluding_keys(self):
772
+ """
773
+ Test filtering paths to exclude specific keys
774
+ """
775
+ test_data = {
776
+ 'definition': 'enjoying or showing or marked by joy or pleasure',
777
+ 'examples': ['a happy smile', 'spent many happy days on the beach'],
778
+ 'related_words': [{'base_form': 'euphoric'}, {'base_form': 'elated'}],
779
+ 'relation_type': 'also_see',
780
+ 'source': 'wordnet',
781
+ 'wordnet_pos': 'a'
782
+ }
783
+
784
+ analyzer = JSONPathStructureAnalyzer()
785
+ analyzer.extract_all_paths(test_data)
786
+
787
+ # Test without exclusion
788
+ all_paths = analyzer.paths
789
+ self.assertIn("$.examples[*]", all_paths)
790
+ self.assertIn("$.definition", all_paths)
791
+
792
+ # Test with exclusion
793
+ filtered_paths = analyzer.filter_paths_excluding_keys({'examples'})
794
+ self.assertNotIn("$.examples[*]", filtered_paths)
795
+ self.assertIn("$.definition", filtered_paths)
796
+ self.assertIn("$.related_words[*]", filtered_paths)
797
+ self.assertIn("$.related_words[*].base_form", filtered_paths)
798
+
799
+ # Test excluding multiple keys
800
+ filtered_paths_multi = analyzer.filter_paths_excluding_keys({'examples', 'source'})
801
+ self.assertNotIn("$.examples[*]", filtered_paths_multi)
802
+ self.assertNotIn("$.source", filtered_paths_multi)
803
+ self.assertIn("$.definition", filtered_paths_multi)
804
+
805
+
806
+ def test_get_filtered_structure_report(self):
807
+ """
808
+ Test filtered structure report generation
809
+ """
810
+ test_data = {
811
+ 'definition': 'test definition',
812
+ 'examples': ['example1', 'example2'],
813
+ 'metadata': {'source': 'test', 'version': 1},
814
+ 'tags': ['tag1', 'tag2', 'tag3']
815
+ }
816
+
817
+ analyzer = JSONPathStructureAnalyzer()
818
+ analyzer.extract_all_paths(test_data)
819
+
820
+ # Test filtered report
821
+ filtered_report = analyzer.get_filtered_structure_report({'examples'})
822
+
823
+ # Should not contain examples
824
+ self.assertNotIn("examples", filtered_report)
825
+
826
+ # Should contain other fields
827
+ self.assertIn("$.definition", filtered_report)
828
+ self.assertIn("$.metadata", filtered_report)
829
+ self.assertIn("$.tags[*]", filtered_report)
830
+
831
+ # Test structure
832
+ lines = filtered_report.split('\n')
833
+ self.assertGreater(len(lines), 0)
834
+
835
+ # Verify specific content
836
+ self.assertIn("$.definition -- test definition", filtered_report)
837
+ self.assertIn("$.tags[*] -- array[3]", filtered_report)
838
+
839
+ def test_get_filtered_paths_with_types(self):
840
+ """
841
+ Test filtered paths with types
842
+ """
843
+ test_data = {
844
+ 'name': 'test',
845
+ 'count': 42,
846
+ 'items': [1, 2, 3],
847
+ 'exclude_me': {'nested': 'value'}
848
+ }
849
+
850
+ analyzer = JSONPathStructureAnalyzer()
851
+ analyzer.extract_all_paths(test_data)
852
+
853
+ # Test filtered paths with types
854
+ filtered_paths_types = analyzer.get_filtered_paths_with_types({'exclude_me'})
855
+
856
+ # Should not contain excluded paths
857
+ self.assertNotIn("$.exclude_me", filtered_paths_types)
858
+ self.assertNotIn("$.exclude_me.nested", filtered_paths_types)
859
+
860
+ # Should contain other paths
861
+ self.assertIn("$.name", filtered_paths_types)
862
+ self.assertIn("$.count", filtered_paths_types)
863
+ self.assertIn("$.items[*]", filtered_paths_types)
864
+
865
+ # Test types
866
+ self.assertEqual(filtered_paths_types["$.name"], "str")
867
+ self.assertEqual(filtered_paths_types["$.count"], "int")
868
+ self.assertEqual(filtered_paths_types["$.items[*]"], "array")
869
+
870
+ def test_get_filtered_detailed_type_report(self):
871
+ """
872
+ Test filtered detailed type report
873
+ """
874
+ test_data = {
875
+ 'title': 'Sample Title',
876
+ 'description': 'Sample Description',
877
+ 'private_data': {'secret': 'hidden'},
878
+ 'public_list': ['item1', 'item2']
879
+ }
880
+
881
+ analyzer = JSONPathStructureAnalyzer()
882
+ analyzer.extract_all_paths(test_data)
883
+
884
+ # Test filtered detailed report
885
+ filtered_detailed = analyzer.get_filtered_detailed_type_report({'private_data'})
886
+
887
+ # Should not contain excluded paths
888
+ self.assertNotIn("$.private_data", filtered_detailed)
889
+ self.assertNotIn("$.private_data.secret", filtered_detailed)
890
+
891
+ # Should contain other paths
892
+ self.assertIn("$.title", filtered_detailed)
893
+ self.assertIn("$.public_list[*]", filtered_detailed)
894
+
895
+ # Test structure of remaining items
896
+ title_info = filtered_detailed["$.title"]
897
+ self.assertEqual(title_info["primary_type"], "str")
898
+ self.assertFalse(title_info["is_array"])
899
+ self.assertIn("Sample Title", title_info["samples"])
900
+
901
+ list_info = filtered_detailed["$.public_list[*]"]
902
+ self.assertEqual(list_info["primary_type"], "array")
903
+ self.assertTrue(list_info["is_array"])
904
+ self.assertEqual(list_info["array_length"], 2)
905
+
906
+ def test_analyze_dict_list_simple_with_exclusion(self):
907
+ """
908
+ Test analyze_dict_list_simple with key exclusion
909
+ """
910
+ dict_list = [
911
+ {
912
+ "name": "John",
913
+ "age": 25,
914
+ "private_info": {"ssn": "123-45-6789"},
915
+ "tags": ["user", "admin"]
916
+ },
917
+ {
918
+ "name": "Jane",
919
+ "age": 30,
920
+ "private_info": {"ssn": "987-65-4321"},
921
+ "tags": ["user"],
922
+ "active": True
923
+ }
924
+ ]
925
+
926
+ # Test with exclusion
927
+ results = analyze_dict_list_simple(dict_list, exclude_keys={'private_info'})
928
+
929
+ # Basic structure tests
930
+ self.assertEqual(len(results), 2)
931
+
932
+ # Test that private_info is excluded from all results
933
+ for result in results:
934
+ paths_with_types = result["paths_with_types"]
935
+ detailed_report = result["detailed_report"]
936
+
937
+ # Should not contain private_info paths
938
+ private_paths = [path for path in paths_with_types.keys() if 'private_info' in path]
939
+ self.assertEqual(len(private_paths), 0, "private_info paths should be excluded")
940
+
941
+ private_detailed = [path for path in detailed_report.keys() if 'private_info' in path]
942
+ self.assertEqual(len(private_detailed), 0, "private_info should be excluded from detailed report")
943
+
944
+ # Should contain other paths
945
+ self.assertIn("$.name", paths_with_types)
946
+ self.assertIn("$.age", paths_with_types)
947
+ self.assertIn("$.tags[*]", paths_with_types)
948
+
949
+ # Test second dict has additional field (but not private_info)
950
+ second_result = results[1]
951
+ self.assertIn("$.active", second_result["paths_with_types"])
952
+ self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
953
+
954
+ # Test structure reports don't contain excluded keys
955
+ for result in results:
956
+ structure_report = result["structure_report"]
957
+ self.assertNotIn("private_info", structure_report)
958
+ self.assertIn("$.name", structure_report)
959
+
960
+ def test_exclusion_with_nested_arrays(self):
961
+ """
962
+ Test exclusion works with nested arrays and complex structures
963
+ """
964
+ test_data = {
965
+ "valid_data": {
966
+ "items": [
967
+ {"id": 1, "name": "item1"},
968
+ {"id": 2, "name": "item2"}
969
+ ]
970
+ },
971
+ "sensitive_data": {
972
+ "secrets": [
973
+ {"key": "secret1", "value": "hidden1"},
974
+ {"key": "secret2", "value": "hidden2"}
975
+ ]
976
+ }
977
+ }
978
+
979
+ analyzer = JSONPathStructureAnalyzer()
980
+ analyzer.extract_all_paths(test_data)
981
+
982
+ # Test exclusion of nested structure
983
+ filtered_paths = analyzer.filter_paths_excluding_keys({'sensitive_data'})
984
+
985
+ # Should exclude all sensitive_data paths
986
+ sensitive_paths = [path for path in analyzer.paths if 'sensitive_data' in path]
987
+ self.assertGreater(len(sensitive_paths), 0, "Should have sensitive_data paths in original")
988
+
989
+ for sensitive_path in sensitive_paths:
990
+ self.assertNotIn(sensitive_path, filtered_paths, f"Should exclude {sensitive_path}")
991
+
992
+ # Should keep valid_data paths
993
+ self.assertIn("$.valid_data", filtered_paths)
994
+ self.assertIn("$.valid_data.items[*]", filtered_paths)
995
+ self.assertIn("$.valid_data.items[*].id", filtered_paths)
996
+ self.assertIn("$.valid_data.items[*].name", filtered_paths)
997
+
998
+
999
+ class TestJSONPathComparator(unittest.TestCase):
1000
+ """
1001
+ Test JSONPath structure comparison functionality
1002
+ """
1003
+
1004
+ def test_extract_structure_paths_comparison(self):
1005
+ """
1006
+ Test extraction of structure paths from both JSON structures
1007
+ """
1008
+ old_paths = extract_structure_paths(old_json)
1009
+ new_paths = extract_structure_paths(new_json)
1010
+
1011
+ # Verify we get a reasonable number of paths
1012
+ self.assertGreaterEqual(len(old_paths), 7)
1013
+ self.assertGreaterEqual(len(new_paths), 7)
1014
+
1015
+ # Verify specific differences
1016
+ self.assertIn("$.key_nested1.array_nested_4[*]", old_paths)
1017
+ self.assertIn("$.key_nested1.array_changed_4[*]", new_paths)
1018
+ self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_paths)
1019
+ self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_paths)
1020
+
1021
+ def test_extract_structure_paths_with_types(self):
1022
+ """
1023
+ Test extraction of structure paths with type information
1024
+ """
1025
+ old_paths_with_types = extract_structure_paths_with_types(old_json)
1026
+ new_paths_with_types = extract_structure_paths_with_types(new_json)
1027
+
1028
+ # Verify we get type information
1029
+ self.assertEqual(old_paths_with_types["$.key1"], "string")
1030
+ self.assertEqual(old_paths_with_types["$.key2"], "integer")
1031
+ self.assertEqual(old_paths_with_types["$.key_nested1"], "object")
1032
+
1033
+ # Check the actual array field, not the [*] path
1034
+ self.assertIn("array", old_paths_with_types["$.key_nested1.array_nested_4"])
1035
+
1036
+ # The [*] path represents the type of array elements (first element)
1037
+ self.assertEqual(old_paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
1038
+
1039
+ # Verify type differences between old and new
1040
+ self.assertIn("$.key_nested1.array_nested_4", old_paths_with_types)
1041
+ self.assertIn("$.key_nested1.array_changed_4", new_paths_with_types)
1042
+ self.assertNotIn("$.key_nested1.array_nested_4", new_paths_with_types)
1043
+
1044
+ def test_compare_structures_array_rename(self):
1045
+ """
1046
+ Test comparison detects array field rename
1047
+ """
1048
+ comparator = JSONPathComparator()
1049
+ comparison = comparator.compare_structures(old_json, new_json)
1050
+
1051
+ # Should detect removed paths (old structure)
1052
+ self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
1053
+ self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", comparison["removed_paths"])
1054
+
1055
+ # Should detect added paths (new structure)
1056
+ self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
1057
+ self.assertIn("$.key_nested1.array_changed_4[*].last_change", comparison["added_paths"])
1058
+
1059
+ def test_compare_structures_with_types(self):
1060
+ """
1061
+ Test comparison with type information
1062
+ """
1063
+ comparator = JSONPathComparator()
1064
+ comparison = comparator.compare_structures_with_types(old_json, new_json)
1065
+
1066
+ # Should detect added paths with types
1067
+ self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
1068
+
1069
+ # Check for the actual existing paths in the comparison
1070
+ # The deeply nested paths might not be included in the type comparison
1071
+ if "$.key_nested1.array_changed_4[*].last_change" in comparison["added_paths"]:
1072
+ self.assertIn("string", comparison["added_paths"]["$.key_nested1.array_changed_4[*].last_change"])
1073
+
1074
+ # Should detect removed paths with types
1075
+ self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
1076
+
1077
+ # Should detect common paths with types
1078
+ self.assertIn("$.key1", comparison["common_paths"])
1079
+ self.assertEqual(comparison["common_paths"]["$.key1"], "string")
1080
+
1081
+ # Should not detect type changes for this example (same types, different paths)
1082
+ self.assertEqual(len(comparison["type_changes"]), 0)
1083
+
1084
+ def test_type_changes_detection(self):
1085
+ """
1086
+ Test detection of type changes in paths
1087
+ """
1088
+ # Create test data with type changes
1089
+ json_with_string = {"test_field": "hello"}
1090
+ json_with_number = {"test_field": 42}
1091
+
1092
+ comparator = JSONPathComparator()
1093
+ comparison = comparator.compare_structures_with_types(json_with_string, json_with_number)
1094
+
1095
+ # Should detect type change
1096
+ self.assertIn("$.test_field", comparison["type_changes"])
1097
+ self.assertEqual(comparison["type_changes"]["$.test_field"]["old_type"], "string")
1098
+ self.assertEqual(comparison["type_changes"]["$.test_field"]["new_type"], "integer")
1099
+
1100
+ def test_compare_structures_unchanged_paths(self):
1101
+ """
1102
+ Test that unchanged paths are correctly identified
1103
+ """
1104
+ comparator = JSONPathComparator()
1105
+ comparison = comparator.compare_structures(old_json, new_json)
1106
+
1107
+ # These paths should remain unchanged
1108
+ unchanged_paths = [
1109
+ "$.key1",
1110
+ "$.key2",
1111
+ "$.key_nested1",
1112
+ "$.key_nested1.key_nested2",
1113
+ "$.key_nested1.key_nested3"
1114
+ ]
1115
+
1116
+ for path in unchanged_paths:
1117
+ self.assertIn(path, comparison["common_paths"], f"Path {path} should be in common paths")
1118
+ self.assertNotIn(path, comparison["added_paths"], f"Path {path} should not be added")
1119
+ self.assertNotIn(path, comparison["removed_paths"], f"Path {path} should not be removed")
1120
+
1121
+ def test_compare_structures_nested_array_preserved(self):
1122
+ """
1123
+ Test the deeply nested array structure is preserved despite parent changes
1124
+ """
1125
+ comparator = JSONPathComparator()
1126
+ comparison = comparator.compare_structures(old_json, new_json)
1127
+
1128
+ # The nested array should exist in both (though path changed due to parent rename)
1129
+ old_nested_array = "$.key_nested1.array_nested_4[*].array_nested_5[*]"
1130
+ new_nested_array = "$.key_nested1.array_changed_4[*].array_nested_5[*]"
1131
+
1132
+ self.assertIn(old_nested_array, comparison["removed_paths"])
1133
+ self.assertIn(new_nested_array, comparison["added_paths"])
1134
+
1135
+ def test_path_validations_with_specific_paths(self):
1136
+ """
1137
+ Test validation of specific paths between old and new structures
1138
+ """
1139
+ common_paths = [
1140
+ "$.key1", # Should exist in both
1141
+ "$.key2", # Should exist in both
1142
+ "$.key_nested1.array_nested_4[*]", # Exists only in old
1143
+ "$.key_nested1.array_changed_4[*]", # Exists only in new
1144
+ "$.key_nested1.key_nested2" # Should exist in both
1145
+ ]
1146
+
1147
+ comparator = JSONPathComparator(common_paths)
1148
+ comparison = comparator.compare_structures(old_json, new_json)
1149
+
1150
+ validations = comparison["path_validations"]
1151
+
1152
+ # Test paths that exist in both
1153
+ self.assertEqual(validations["$.key1"]["status"], "✅")
1154
+ self.assertTrue(validations["$.key1"]["old_found"])
1155
+ self.assertTrue(validations["$.key1"]["new_found"])
1156
+
1157
+ # Test paths that exist only in old
1158
+ self.assertEqual(validations["$.key_nested1.array_nested_4[*]"]["status"], "❌")
1159
+ self.assertTrue(validations["$.key_nested1.array_nested_4[*]"]["old_found"])
1160
+ self.assertFalse(validations["$.key_nested1.array_nested_4[*]"]["new_found"])
1161
+
1162
+ # Test paths that exist only in new
1163
+ self.assertEqual(validations["$.key_nested1.array_changed_4[*]"]["status"], "❌")
1164
+ self.assertFalse(validations["$.key_nested1.array_changed_4[*]"]["old_found"])
1165
+ self.assertTrue(validations["$.key_nested1.array_changed_4[*]"]["new_found"])
1166
+
1167
+
1168
+ class TestJSONPathIntegration(unittest.TestCase):
1169
+ """
1170
+ Integration tests for the complete JSONPath diff workflow
1171
+ """
1172
+
1173
+ def test_complete_diff_workflow(self):
1174
+ """
1175
+ Test the complete workflow from analysis to comparison using the provided data
1176
+ """
1177
+ # Step 1: Analyze the old structure
1178
+ old_report = analyze_with_jsonpath(old_json)
1179
+ self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_report)
1180
+
1181
+ # Step 2: Analyze the new structure
1182
+ new_report = analyze_with_jsonpath(new_json)
1183
+ self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_report)
1184
+
1185
+ # Step 3: Compare structures
1186
+ critical_paths = [
1187
+ "$.key1",
1188
+ "$.key2",
1189
+ "$.key_nested1.key_nested2",
1190
+ "$.key_nested1.key_nested3"
1191
+ ]
1192
+
1193
+ comparison = compare_json_with_jsonpath(old_json, new_json, critical_paths)
1194
+
1195
+ # Verify comparison results
1196
+ self.assertIsInstance(comparison, dict)
1197
+ self.assertGreater(len(comparison["added_paths"]), 0)
1198
+ self.assertGreater(len(comparison["removed_paths"]), 0)
1199
+ self.assertGreater(len(comparison["common_paths"]), 0)
1200
+
1201
+ def test_complete_diff_workflow_with_types(self):
1202
+ """
1203
+ Test the complete workflow with type information
1204
+ """
1205
+ # Step 1: Compare structures with types
1206
+ critical_paths = [
1207
+ "$.key1",
1208
+ "$.key2",
1209
+ "$.key_nested1.key_nested2"
1210
+ ]
1211
+
1212
+ comparison = compare_json_with_jsonpath_and_types(old_json, new_json, critical_paths)
1213
+
1214
+ # Verify comparison results include type information
1215
+ self.assertIsInstance(comparison, dict)
1216
+ self.assertIn("added_paths", comparison)
1217
+ self.assertIn("removed_paths", comparison)
1218
+ self.assertIn("type_changes", comparison)
1219
+
1220
+ # Verify type information is included
1221
+ if comparison["added_paths"]:
1222
+ for path, type_info in comparison["added_paths"].items():
1223
+ self.assertIsInstance(type_info, str)
1224
+ self.assertGreater(len(type_info), 0)
1225
+
1226
+ def test_detect_specific_changes(self):
1227
+ """
1228
+ Test detection of the specific changes between old and new JSON
1229
+ """
1230
+ comparison = compare_json_with_jsonpath(old_json, new_json)
1231
+
1232
+ # Key changes that should be detected:
1233
+ # 1. array_nested_4 -> array_changed_4
1234
+ # 2. key_nested4 -> last_change
1235
+
1236
+ expected_removed = [
1237
+ "$.key_nested1.array_nested_4[*]",
1238
+ "$.key_nested1.array_nested_4[*].key_nested4"
1239
+ ]
1240
+
1241
+ expected_added = [
1242
+ "$.key_nested1.array_changed_4[*]",
1243
+ "$.key_nested1.array_changed_4[*].last_change"
1244
+ ]
1245
+
1246
+ for path in expected_removed:
1247
+ self.assertIn(path, comparison["removed_paths"], f"Expected removed path {path} not found")
1248
+
1249
+ for path in expected_added:
1250
+ self.assertIn(path, comparison["added_paths"], f"Expected added path {path} not found")
1251
+
1252
+ def test_structure_variations_old(self):
1253
+ """
1254
+ Test that old JSON structure contains expected array name
1255
+ """
1256
+ analyzer = JSONPathStructureAnalyzer()
1257
+ paths = analyzer.extract_all_paths(old_json)
1258
+
1259
+ expected_path = "$.key_nested1.array_nested_4[*]"
1260
+ self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
1261
+
1262
+ def test_structure_variations_new(self):
1263
+ """
1264
+ Test that new JSON structure contains expected array name
1265
+ """
1266
+ analyzer = JSONPathStructureAnalyzer()
1267
+ paths = analyzer.extract_all_paths(new_json)
1268
+
1269
+ expected_path = "$.key_nested1.array_changed_4[*]"
1270
+ self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
1271
+
1272
+ def test_json_string_compatibility(self):
1273
+ """
1274
+ Test that the tools work with JSON strings (serialized/deserialized)
1275
+ """
1276
+ # Convert to JSON string and back
1277
+ old_string = json.dumps(old_json)
1278
+ new_string = json.dumps(new_json)
1279
+
1280
+ old_parsed = json.loads(old_string)
1281
+ new_parsed = json.loads(new_string)
1282
+
1283
+ # Should work the same as with dict objects
1284
+ comparison = compare_json_with_jsonpath(old_parsed, new_parsed)
1285
+
1286
+ self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
1287
+ self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
1288
+
1289
+
1290
+ class TestEdgeCases(unittest.TestCase):
1291
+ """
1292
+ Test edge cases with the provided data structure
1293
+ """
1294
+
1295
+ def test_empty_json_comparison(self):
1296
+ """
1297
+ Test comparison with empty JSON
1298
+ """
1299
+ empty_json = {}
1300
+
1301
+ comparison = compare_json_with_jsonpath(old_json, empty_json)
1302
+
1303
+ # All old paths should be removed
1304
+ self.assertGreater(len(comparison["removed_paths"]), 0)
1305
+ self.assertEqual(len(comparison["added_paths"]), 0)
1306
+ self.assertEqual(len(comparison["common_paths"]), 0)
1307
+
1308
+ def test_empty_json_comparison_with_types(self):
1309
+ """
1310
+ Test comparison with empty JSON including types
1311
+ """
1312
+ empty_json = {}
1313
+
1314
+ comparison = compare_json_with_jsonpath_and_types(old_json, empty_json)
1315
+
1316
+ # All old paths should be removed with type info
1317
+ self.assertGreater(len(comparison["removed_paths"]), 0)
1318
+ self.assertEqual(len(comparison["added_paths"]), 0)
1319
+
1320
+ # Empty JSON still has the root path "$" in common
1321
+ # The comparison includes the root "$" path in both structures
1322
+ self.assertLessEqual(len(comparison["common_paths"]), 1)
1323
+
1324
+ # Verify type information is preserved
1325
+ for path, type_info in comparison["removed_paths"].items():
1326
+ self.assertIsInstance(type_info, str)
1327
+
1328
+ def test_identical_json_comparison(self):
1329
+ """
1330
+ Test comparison of identical JSON structures
1331
+ """
1332
+ comparison = compare_json_with_jsonpath(old_json, old_json)
1333
+
1334
+ # Should have no changes
1335
+ self.assertEqual(len(comparison["added_paths"]), 0)
1336
+ self.assertEqual(len(comparison["removed_paths"]), 0)
1337
+ self.assertGreater(len(comparison["common_paths"]), 0)
1338
+
1339
+ def test_identical_json_comparison_with_types(self):
1340
+ """
1341
+ Test comparison of identical JSON structures with types
1342
+ """
1343
+ comparison = compare_json_with_jsonpath_and_types(old_json, old_json)
1344
+
1345
+ # Should have no changes
1346
+ self.assertEqual(len(comparison["added_paths"]), 0)
1347
+ self.assertEqual(len(comparison["removed_paths"]), 0)
1348
+ self.assertEqual(len(comparison["type_changes"]), 0)
1349
+ self.assertGreater(len(comparison["common_paths"]), 0)
1350
+
1351
+ def test_deep_nested_array_analysis(self):
1352
+ """
1353
+ Test analysis of the deepest nested array (array_nested_5)
1354
+ """
1355
+ analyzer = JSONPathStructureAnalyzer()
1356
+ analyzer.extract_all_paths(old_json)
1357
+ report = analyzer.get_structure_report()
1358
+
1359
+ # Should properly analyze the deeply nested array
1360
+ self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
1361
+
1362
+ def test_array_type_detection(self):
1363
+ """
1364
+ Test proper detection of array types in extract_structure_paths_with_types
1365
+ """
1366
+ paths_with_types = extract_structure_paths_with_types(old_json)
1367
+
1368
+ # Test array type detection on the actual array field, not the [*] path
1369
+ # The array field itself should have "array" in its type
1370
+ self.assertIn("array", paths_with_types["$.key_nested1.array_nested_4"])
1371
+ # The [*] path represents the type of array elements (the first element is a string)
1372
+ self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
1373
+
1374
+
1375
+ class TestSimpleUsageExamples(unittest.TestCase):
1376
+ """
1377
+ Simple examples showing how to use the JSONPath diff tool
1378
+ """
1379
+
1380
+ def test_basic_structure_analysis_old(self):
1381
+ """
1382
+ Analyze the structure of the old JSON
1383
+ """
1384
+ report = analyze_with_jsonpath(old_json)
1385
+ app_logger.info("\nOLD JSON STRUCTURE:")
1386
+ app_logger.info(report)
1387
+
1388
+ # Basic assertions
1389
+ self.assertIn("$.key1 -- row 1", report)
1390
+ self.assertIn("$.key2 -- 22", report)
1391
+ self.assertIn("array_nested_4", report)
1392
+ self.assertIn("key_nested4", report)
1393
+
1394
+ def test_basic_structure_analysis_new(self):
1395
+ """
1396
+ Analyze the structure of the new JSON
1397
+ """
1398
+ report = analyze_with_jsonpath(new_json)
1399
+ app_logger.info("\nNEW JSON STRUCTURE:")
1400
+ app_logger.info(report)
1401
+
1402
+ # Basic assertions
1403
+ self.assertIn("$.key1 -- row 1", report)
1404
+ self.assertIn("$.key2 -- 22", report)
1405
+ self.assertIn("array_changed_4", report)
1406
+ self.assertIn("last_change", report)
1407
+
1408
+ def test_basic_comparison(self):
1409
+ """
1410
+ Compare old versus new JSON structures
1411
+ """
1412
+ app_logger.info("\nCOMPARISON RESULTS:")
1413
+ comparison = compare_json_with_jsonpath(old_json, new_json)
1414
+
1415
+ # Verify the main changes
1416
+ self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
1417
+ self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
1418
+
1419
+ # Verify unchanged elements
1420
+ self.assertIn("$.key1", comparison["common_paths"])
1421
+ self.assertIn("$.key2", comparison["common_paths"])
1422
+
1423
+ def test_basic_comparison_with_types(self):
1424
+ """
1425
+ Compare old versus new JSON structures with type information
1426
+ """
1427
+ app_logger.info("\nCOMPARISON RESULTS WITH TYPES:")
1428
+ comparison = compare_json_with_jsonpath_and_types(old_json, new_json)
1429
+
1430
+ # Verify the main changes with types
1431
+ self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
1432
+ self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
1433
+
1434
+ # Verify type information is included
1435
+ self.assertEqual(comparison["common_paths"]["$.key1"], "string")
1436
+ self.assertEqual(comparison["common_paths"]["$.key2"], "integer")
1437
+
1438
+
1439
+ if __name__ == '__main__':
1440
+ unittest.main()
tests/my_ghost_writer/test_text_parsers2.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import unittest
2
  from unittest.mock import patch, MagicMock
3
 
@@ -5,6 +6,11 @@ from fastapi import HTTPException
5
 
6
  from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, get_wordnet_synonyms, inflect_synonym,
7
  is_nlp_available, process_synonym_groups)
 
 
 
 
 
8
 
9
 
10
  class TestTextParsers2(unittest.TestCase):
@@ -77,42 +83,45 @@ class TestTextParsers2(unittest.TestCase):
77
 
78
  def test_get_wordnet_synonyms(self):
79
  # Test with a word that has known synonyms
 
 
80
  word = "piano"
81
- synonyms = get_wordnet_synonyms(word)
 
 
 
 
 
82
 
83
- self.assertGreater(len(synonyms), 0)
84
- first_result = synonyms[0]
85
- self.assertIsInstance(first_result, dict)
86
- self.assertIn('definition', first_result)
87
- self.assertIn('examples', first_result)
88
- self.assertIn('pos', first_result)
89
- self.assertIn('synonyms', first_result)
90
- self.assertIsInstance(first_result['synonyms'], list)
91
 
92
  def test_get_wordnet_synonyms_custom_entry(self):
93
  word = "happy"
94
  pos = "ADJ"
95
- synonyms_list = get_wordnet_synonyms(word, pos)
96
- for synonym_by_sense in synonyms_list:
97
- self.assertIsInstance(synonym_by_sense, dict)
98
- self.assertIsInstance(synonym_by_sense["definition"], str)
99
- self.assertEqual(synonym_by_sense["pos"], pos)
100
- self.assertIsInstance(synonym_by_sense["examples"], list)
101
- synonyms = synonym_by_sense["synonyms"]
102
- for synonym_dict in synonyms:
103
- self.assertIsInstance(synonym_dict, dict)
104
- self.assertIsInstance(synonym_dict["definition"], str)
105
- self.assertIsInstance(synonym_dict["synonym"], str)
106
- self.assertIsInstance(synonym_dict["is_custom"], bool)
 
 
107
 
108
  def test_get_wordnet_synonyms_pos_filter(self):
109
  # Test with POS filtering
110
  word = "hunt"
111
- synonyms_verbs = get_wordnet_synonyms(word, pos_tag="VERB")
112
 
113
- self.assertGreater(len(synonyms_verbs), 0)
114
- for sense in synonyms_verbs:
115
- self.assertEqual(sense['pos'], 'v') # 'v' is the WordNet tag for VERB
116
 
117
  @patch("my_ghost_writer.text_parsers2.wn.synsets")
118
  def test_get_wordnet_synonyms_generic_exception(self, mock_synsets):
@@ -122,7 +131,7 @@ class TestTextParsers2(unittest.TestCase):
122
 
123
  # 400 Exception intercepted and relaunched as 500
124
  self.assertEqual(context.exception.status_code, 500)
125
- self.assertIn("Error retrieving synonyms: test exception", context.exception.detail)
126
 
127
  def test_inflect_synonym_noun_plural(self):
128
  # Test noun pluralization
@@ -263,11 +272,11 @@ class TestTextParsers2(unittest.TestCase):
263
  self.assertIsInstance(result, list)
264
  self.assertGreater(len(result), 0)
265
 
266
- first_sense = result[0]
267
  self.assertIn('definition', first_sense)
268
- self.assertIn('synonyms', first_sense)
269
 
270
- first_synonym_info = first_sense['synonyms'][0]
271
  self.assertIn('base_form', first_synonym_info)
272
  self.assertIn('inflected_form', first_synonym_info)
273
  # For a past-tense verb, the inflected form should be different from the base
@@ -283,19 +292,24 @@ class TestTextParsers2(unittest.TestCase):
283
  'original_indices': {'end': 60, 'start': 55}, 'pos': 'ADJ', 'sentence_position': 9,
284
  'tag': 'JJ', 'word': 'happy'
285
  }
286
- result_synonym_groups_list = process_synonym_groups(word, context_info)
287
- self.assertIsInstance(result_synonym_groups_list, list)
288
- for expected_synonym_group in result_synonym_groups_list:
289
- self.assertIsInstance(expected_synonym_group, dict)
290
- self.assertIsInstance(expected_synonym_group["definition"], str)
291
- self.assertEqual(expected_synonym_group["wordnet_pos"], context_info["pos"])
292
- self.assertIsInstance(expected_synonym_group["examples"], list)
293
- synonyms = expected_synonym_group["synonyms"]
294
- for synonym_dict in synonyms:
295
- self.assertIsInstance(synonym_dict, dict)
296
- self.assertIsInstance(synonym_dict["base_form"], str)
297
- self.assertIsInstance(synonym_dict["inflected_form"], str)
298
- self.assertIsInstance(synonym_dict["matches_context"], bool)
 
 
 
 
 
299
 
300
  @patch("my_ghost_writer.text_parsers2.wn.synsets")
301
  def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
 
1
+ import json
2
  import unittest
3
  from unittest.mock import patch, MagicMock
4
 
 
6
 
7
  from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, get_wordnet_synonyms, inflect_synonym,
8
  is_nlp_available, process_synonym_groups)
9
+ from my_ghost_writer.jsonpath_comparator import JSONPathComparator
10
+ from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer, analyze_dict_list_simple
11
+ from my_ghost_writer.type_hints import TermRelationships
12
+ from tests import EVENTS_FOLDER
13
+ from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists
14
 
15
 
16
  class TestTextParsers2(unittest.TestCase):
 
83
 
84
  def test_get_wordnet_synonyms(self):
85
  # Test with a word that has known synonyms
86
+ with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "r") as src:
87
+ expected_detailed_report = json.load(src)
88
  word = "piano"
89
+ related_words = get_wordnet_synonyms(word)
90
+ first_related_words = related_words[0]
91
+ analyzer = JSONPathStructureAnalyzer()
92
+ analyzer.extract_all_paths(first_related_words)
93
+ detailed_report = analyzer.get_detailed_type_report()
94
+ analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)
95
 
96
+ # with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "w") as src:
97
+ # json.dump(detailed_report, src)
 
 
 
 
 
 
98
 
99
  def test_get_wordnet_synonyms_custom_entry(self):
100
  word = "happy"
101
  pos = "ADJ"
102
+ with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "r") as src:
103
+ expected_report_dict_list = json.load(src)
104
+ # expected_detailed_report = []
105
+ related_word_groups = get_wordnet_synonyms(word, pos)
106
+ self.assertIsInstance(related_word_groups[0]["examples"], list)
107
+ for related_word_nth, expected_detailed_report_nth in zip(related_word_groups, expected_report_dict_list):
108
+ del related_word_nth['examples']
109
+ analyzer = JSONPathStructureAnalyzer()
110
+ analyzer.extract_all_paths(related_word_nth)
111
+ detailed_report = analyzer.get_detailed_type_report(get_samples=False)
112
+ self.assertDictEqual(detailed_report, expected_detailed_report_nth)
113
+ # expected_detailed_report.append(detailed_report)
114
+ # with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "w") as src:
115
+ # json.dump(expected_detailed_report, src)
116
 
117
  def test_get_wordnet_synonyms_pos_filter(self):
118
  # Test with POS filtering
119
  word = "hunt"
120
+ related_words_verbs = get_wordnet_synonyms(word, pos_tag="VERB")
121
 
122
+ self.assertGreater(len(related_words_verbs), 0)
123
+ for sense in related_words_verbs:
124
+ self.assertEqual(sense['wordnet_pos'], 'v') # 'v' is the WordNet tag for VERB
125
 
126
  @patch("my_ghost_writer.text_parsers2.wn.synsets")
127
  def test_get_wordnet_synonyms_generic_exception(self, mock_synsets):
 
131
 
132
  # 400 Exception intercepted and relaunched as 500
133
  self.assertEqual(context.exception.status_code, 500)
134
+ self.assertIn("Error retrieving related words: 'test exception'", context.exception.detail)
135
 
136
  def test_inflect_synonym_noun_plural(self):
137
  # Test noun pluralization
 
272
  self.assertIsInstance(result, list)
273
  self.assertGreater(len(result), 0)
274
 
275
+ first_sense = dict(result[0])
276
  self.assertIn('definition', first_sense)
277
+ self.assertIn('related_words', first_sense)
278
 
279
+ first_synonym_info = dict(first_sense['related_words'][0])
280
  self.assertIn('base_form', first_synonym_info)
281
  self.assertIn('inflected_form', first_synonym_info)
282
  # For a past-tense verb, the inflected form should be different from the base
 
292
  'original_indices': {'end': 60, 'start': 55}, 'pos': 'ADJ', 'sentence_position': 9,
293
  'tag': 'JJ', 'word': 'happy'
294
  }
295
+ # RelatedWordGroup
296
+ result_related_word_groups_list = process_synonym_groups(word, context_info)
297
+ self.assertIsInstance(result_related_word_groups_list, list)
298
+ for related_words_group in result_related_word_groups_list:
299
+ related_word_group_dict = dict(related_words_group)
300
+ self.assertIsInstance(related_word_group_dict, dict)
301
+ self.assertIsInstance(related_word_group_dict["definition"], str)
302
+ self.assertIn("relation_type", related_word_group_dict)
303
+ self.assertIn(related_word_group_dict["relation_type"], TermRelationships)
304
+ self.assertIsInstance(related_word_group_dict["examples"], list)
305
+ related_words = related_word_group_dict["related_words"]
306
+ for _word_dict in related_words:
307
+ word_dict = dict(_word_dict)
308
+ self.assertIsInstance(word_dict, dict)
309
+ self.assertIsInstance(word_dict["base_form"], str)
310
+ self.assertIsInstance(word_dict["inflected_form"], str)
311
+ self.assertIsInstance(word_dict["matches_context"], bool)
312
+ self.assertIn("is_custom", word_dict)
313
 
314
  @patch("my_ghost_writer.text_parsers2.wn.synsets")
315
  def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):