alessandro trinca tornidor
commited on
Commit
·
a707261
1
Parent(s):
59bceb1
feat: make /thesaurus-inflated-phrase agnostic, not bounded to synonyms - /thesaurus-custom fixed
Browse files- my_ghost_writer/app.py +13 -8
- my_ghost_writer/custom_synonym_handler.py +79 -11
- my_ghost_writer/jsonpath_comparator.py +317 -0
- my_ghost_writer/jsonpath_extractor.py +558 -0
- my_ghost_writer/text_parsers2.py +241 -158
- my_ghost_writer/type_hints.py +56 -21
- poetry.lock +29 -1
- pyproject.toml +1 -0
- requirements-test.txt +1 -1
- requirements-webserver.txt +1 -0
- requirements.txt +3 -1
- tests/events/get_wordnet_synonyms_custom_entry_happy.json +1 -0
- tests/events/get_wordnet_synonyms_piano_ok1.json +1 -0
- tests/events/request_thesaurus_custom4.json +20 -0
- tests/events/response_thesaurus_phrase_inflated.json +1511 -104
- tests/events/response_thesaurus_phrase_inflated2.json +1707 -0
- tests/events/response_thesaurus_phrase_inflated_structure.json +1 -0
- tests/my_ghost_writer/helpers_tests.py +10 -0
- tests/my_ghost_writer/test_app.py +49 -13
- tests/my_ghost_writer/test_custom_synonym_handler.py +12 -8
- tests/my_ghost_writer/test_extract_jsonpaths.py +1440 -0
- tests/my_ghost_writer/test_text_parsers2.py +56 -42
my_ghost_writer/app.py
CHANGED
@@ -27,7 +27,7 @@ from my_ghost_writer.pymongo_utils import mongodb_health_check
|
|
27 |
from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
|
28 |
from my_ghost_writer.thesaurus import get_current_info_wordnet
|
29 |
from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
|
30 |
-
RequestSplitText, RequestTextFrequencyBody,
|
31 |
|
32 |
|
33 |
async def mongo_health_check_background_task():
|
@@ -225,7 +225,7 @@ def get_thesaurus_wordsapi(body: RequestQueryThesaurusWordsapiBody | str) -> JSO
|
|
225 |
raise HTTPException(status_code=response.status_code, detail=msg)
|
226 |
|
227 |
|
228 |
-
@app.post("/thesaurus-inflated-phrase", response_model=
|
229 |
async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
230 |
"""
|
231 |
Get contextual synonyms for a selected phrase (one or more words).
|
@@ -252,7 +252,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
252 |
app_logger.info(f"text:{text}!")
|
253 |
app_logger.info(f"word:{word}!")
|
254 |
|
255 |
-
#
|
256 |
|
257 |
try:
|
258 |
# The new function in text_parsers2 does all the heavy lifting
|
@@ -266,7 +266,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
266 |
app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
|
267 |
app_logger.debug(results)
|
268 |
|
269 |
-
#
|
270 |
|
271 |
message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
|
272 |
|
@@ -274,13 +274,16 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
274 |
duration = (t2 - t1).total_seconds()
|
275 |
app_logger.info(f"got MultiWordSynonymResponse() result in: {duration:.3f}s. ...")
|
276 |
# Construct the final response using our Pydantic model
|
277 |
-
|
278 |
success=True,
|
279 |
original_phrase=body.word,
|
280 |
original_indices={"start": body.start, "end": body.end},
|
281 |
results=results,
|
282 |
-
message=message
|
|
|
283 |
)
|
|
|
|
|
284 |
|
285 |
except HTTPException as http_ex:
|
286 |
# Re-raise known HTTP exceptions to be handled by FastAPI's handler
|
@@ -292,10 +295,12 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
292 |
|
293 |
|
294 |
@app.post("/thesaurus-custom")
|
295 |
-
async def add_custom_synonyms(body:
|
296 |
"""Adds custom synonyms for a given word to the in-memory store."""
|
297 |
try:
|
298 |
-
|
|
|
|
|
299 |
return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
|
300 |
|
301 |
except Exception as e:
|
|
|
27 |
from my_ghost_writer.text_parsers2 import find_synonyms_for_phrase, custom_synonym_handler
|
28 |
from my_ghost_writer.thesaurus import get_current_info_wordnet
|
29 |
from my_ghost_writer.type_hints import (RequestQueryThesaurusInflatedBody, RequestQueryThesaurusWordsapiBody,
|
30 |
+
RequestSplitText, RequestTextFrequencyBody, MultiRelatedWordResponse, CustomRelatedWordRequest)
|
31 |
|
32 |
|
33 |
async def mongo_health_check_background_task():
|
|
|
225 |
raise HTTPException(status_code=response.status_code, detail=msg)
|
226 |
|
227 |
|
228 |
+
@app.post("/thesaurus-inflated-phrase", response_model=MultiRelatedWordResponse)
|
229 |
async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
230 |
"""
|
231 |
Get contextual synonyms for a selected phrase (one or more words).
|
|
|
252 |
app_logger.info(f"text:{text}!")
|
253 |
app_logger.info(f"word:{word}!")
|
254 |
|
255 |
+
# persistence
|
256 |
|
257 |
try:
|
258 |
# The new function in text_parsers2 does all the heavy lifting
|
|
|
266 |
app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
|
267 |
app_logger.debug(results)
|
268 |
|
269 |
+
# persistence
|
270 |
|
271 |
message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
|
272 |
|
|
|
274 |
duration = (t2 - t1).total_seconds()
|
275 |
app_logger.info(f"got MultiWordSynonymResponse() result in: {duration:.3f}s. ...")
|
276 |
# Construct the final response using our Pydantic model
|
277 |
+
response_object = MultiRelatedWordResponse(
|
278 |
success=True,
|
279 |
original_phrase=body.word,
|
280 |
original_indices={"start": body.start, "end": body.end},
|
281 |
results=results,
|
282 |
+
message=message,
|
283 |
+
duration=duration
|
284 |
)
|
285 |
+
response_json = response_object.model_dump_json(exclude_none=True)
|
286 |
+
return JSONResponse(status_code=200, content=json.loads(response_json))
|
287 |
|
288 |
except HTTPException as http_ex:
|
289 |
# Re-raise known HTTP exceptions to be handled by FastAPI's handler
|
|
|
295 |
|
296 |
|
297 |
@app.post("/thesaurus-custom")
|
298 |
+
async def add_custom_synonyms(body: CustomRelatedWordRequest):
|
299 |
"""Adds custom synonyms for a given word to the in-memory store."""
|
300 |
try:
|
301 |
+
word = body.word
|
302 |
+
related_list = body.related
|
303 |
+
custom_synonym_handler.add_entry(word, related_list)
|
304 |
return {"message": f"Custom entry for '{body.word}' added/updated successfully (in-memory)."}
|
305 |
|
306 |
except Exception as e:
|
my_ghost_writer/custom_synonym_handler.py
CHANGED
@@ -1,22 +1,46 @@
|
|
1 |
from typing import Any
|
2 |
|
|
|
|
|
|
|
3 |
|
4 |
class CustomSynonymHandler:
|
|
|
|
|
|
|
5 |
def __init__(self):
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
8 |
# For reverse lookups
|
9 |
self.inverted_index: dict[str, set[str]] = {}
|
10 |
|
11 |
-
def add_entry(self, word: str, related: list[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
word = word.lower()
|
13 |
if word not in self.lexicon:
|
14 |
self.lexicon[word] = {}
|
15 |
-
for relation in related:
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
group = {
|
18 |
-
"words": [w.lower().strip() for w in relation
|
19 |
-
"definition": relation.
|
20 |
}
|
21 |
if relation_type not in self.lexicon[word]:
|
22 |
self.lexicon[word][relation_type] = []
|
@@ -27,9 +51,20 @@ class CustomSynonymHandler:
|
|
27 |
self.inverted_index[w].add(word)
|
28 |
|
29 |
def delete_entry(self, word: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
word = word.lower()
|
31 |
if word not in self.lexicon:
|
32 |
-
raise KeyError(f"No custom
|
33 |
# Remove from inverted index
|
34 |
for relation_groups in self.lexicon[word].values():
|
35 |
for group in relation_groups:
|
@@ -37,18 +72,51 @@ class CustomSynonymHandler:
|
|
37 |
del self.lexicon[word]
|
38 |
|
39 |
def _update_group_words(self, group, word):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
for w in group["words"]:
|
41 |
if w in self.inverted_index:
|
42 |
self.inverted_index[w].discard(word)
|
43 |
if not self.inverted_index[w]:
|
44 |
del self.inverted_index[w]
|
45 |
|
46 |
-
def get_related(self, word: str, relation_type:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
word = word.lower()
|
48 |
-
if word in self.lexicon
|
49 |
-
|
|
|
|
|
50 |
return []
|
51 |
|
52 |
def reverse_lookup(self, related_word: str) -> set[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
related_word = related_word.lower()
|
54 |
return self.inverted_index.get(related_word, set())
|
|
|
1 |
from typing import Any
|
2 |
|
3 |
+
from my_ghost_writer.constants import app_logger
|
4 |
+
from my_ghost_writer.type_hints import RelatedEntry, TermRelationships
|
5 |
+
|
6 |
|
7 |
class CustomSynonymHandler:
|
8 |
+
"""
|
9 |
+
Handles the storage and retrieval of custom-related words.
|
10 |
+
"""
|
11 |
def __init__(self):
|
12 |
+
"""
|
13 |
+
Initializes the CustomSynonymHandler with an empty lexicon and inverted index.
|
14 |
+
"""
|
15 |
+
# {word: {relation_type: [{words: related_word, definition: definition}]}}
|
16 |
+
self.lexicon: dict[str, dict[TermRelationships, list[dict[str, Any]]]] = {}
|
17 |
# For reverse lookups
|
18 |
self.inverted_index: dict[str, set[str]] = {}
|
19 |
|
20 |
+
def add_entry(self, word: str, related: list[RelatedEntry]):
|
21 |
+
"""
|
22 |
+
Adds a custom-related word entry to the lexicon.
|
23 |
+
Side Effects: Updates the lexicon and inverted_index with the new related word entry.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
word: The word to add related words for (str).
|
27 |
+
related: A list of RelatedEntry objects representing the related words (list[RelatedEntry]).
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
None
|
31 |
+
"""
|
32 |
word = word.lower()
|
33 |
if word not in self.lexicon:
|
34 |
self.lexicon[word] = {}
|
35 |
+
for n, relation in enumerate(related):
|
36 |
+
try:
|
37 |
+
relation_type = relation.type
|
38 |
+
except AttributeError as attr_err:
|
39 |
+
app_logger.error(f"AttributeError: {attr_err}, n:{n}.")
|
40 |
+
raise attr_err
|
41 |
group = {
|
42 |
+
"words": [w.lower().strip() for w in relation.words],
|
43 |
+
"definition": relation.definition
|
44 |
}
|
45 |
if relation_type not in self.lexicon[word]:
|
46 |
self.lexicon[word][relation_type] = []
|
|
|
51 |
self.inverted_index[w].add(word)
|
52 |
|
53 |
def delete_entry(self, word: str):
|
54 |
+
"""
|
55 |
+
Deletes a custom-related word entry from the lexicon.
|
56 |
+
Side Effects: Removes the related word entry from the lexicon and updates the inverted_index.
|
57 |
+
Raises: KeyError: If the word is not found in the lexicon.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
word: The word to delete related words for (str).
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
None
|
64 |
+
"""
|
65 |
word = word.lower()
|
66 |
if word not in self.lexicon:
|
67 |
+
raise KeyError(f"No custom related words found for word '{word}'.")
|
68 |
# Remove from inverted index
|
69 |
for relation_groups in self.lexicon[word].values():
|
70 |
for group in relation_groups:
|
|
|
72 |
del self.lexicon[word]
|
73 |
|
74 |
def _update_group_words(self, group, word):
|
75 |
+
"""
|
76 |
+
Updates the inverted index when a related word entry is deleted.
|
77 |
+
Side Effects: Updates the inverted_index by discarding or deleting entries.
|
78 |
+
|
79 |
+
Args:
|
80 |
+
group: The group of related words (dict).
|
81 |
+
word: The word that the related words are associated with (str).
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
None
|
85 |
+
"""
|
86 |
for w in group["words"]:
|
87 |
if w in self.inverted_index:
|
88 |
self.inverted_index[w].discard(word)
|
89 |
if not self.inverted_index[w]:
|
90 |
del self.inverted_index[w]
|
91 |
|
92 |
+
def get_related(self, word: str, relation_type: TermRelationships) -> list[dict[str, Any]]:
|
93 |
+
"""
|
94 |
+
Retrieves related words for a given word and relation type.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
word: The word to retrieve related words for (str).
|
98 |
+
relation_type: The type of relationship to retrieve (TermRelationships).
|
99 |
+
|
100 |
+
Returns:
|
101 |
+
A list of dictionaries, where each dictionary represents a related word group (list[dict[str, Any]]).
|
102 |
+
Returns an empty list if no related words are found.
|
103 |
+
"""
|
104 |
word = word.lower()
|
105 |
+
if word in self.lexicon:
|
106 |
+
word_lexicon = self.lexicon[word]
|
107 |
+
if relation_type in word_lexicon:
|
108 |
+
return word_lexicon[relation_type]
|
109 |
return []
|
110 |
|
111 |
def reverse_lookup(self, related_word: str) -> set[str]:
|
112 |
+
"""
|
113 |
+
Performs a reverse lookup to find words that have the given word as a related word.
|
114 |
+
|
115 |
+
Args:
|
116 |
+
related_word: The word to search for (str).
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
A set of words that have the given word as a related word (set[str]).
|
120 |
+
"""
|
121 |
related_word = related_word.lower()
|
122 |
return self.inverted_index.get(related_word, set())
|
my_ghost_writer/jsonpath_comparator.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from jsonpath_ng import parse
|
2 |
+
from jsonpath_ng.ext import parse as parse_ext
|
3 |
+
from typing import Dict, Set, Any, List
|
4 |
+
|
5 |
+
|
6 |
+
def extract_structure_paths(data: dict) -> Set[str]:
|
7 |
+
"""
|
8 |
+
Extract all available paths from JSON data
|
9 |
+
|
10 |
+
Args:
|
11 |
+
data: JSON data to analyze
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
Set of all JSONPath expressions found in the data
|
15 |
+
"""
|
16 |
+
paths = set()
|
17 |
+
|
18 |
+
def _collect_paths(obj: Any, path: str = "$"):
|
19 |
+
if isinstance(obj, dict):
|
20 |
+
for key, value in obj.items():
|
21 |
+
current_path = f"{path}.{key}"
|
22 |
+
paths.add(current_path)
|
23 |
+
_collect_paths(value, current_path)
|
24 |
+
elif isinstance(obj, list):
|
25 |
+
array_path = f"{path}[*]"
|
26 |
+
paths.add(array_path)
|
27 |
+
if obj:
|
28 |
+
for item in obj:
|
29 |
+
_collect_paths(item, array_path)
|
30 |
+
|
31 |
+
_collect_paths(data)
|
32 |
+
return paths
|
33 |
+
|
34 |
+
|
35 |
+
def extract_structure_paths_with_types(data: dict) -> Dict[str, str]:
|
36 |
+
"""
|
37 |
+
Extract all available paths from JSON data with their value types
|
38 |
+
|
39 |
+
Args:
|
40 |
+
data: JSON data to analyze
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
Dictionary mapping JSONPath expressions to their value types
|
44 |
+
"""
|
45 |
+
paths_with_types = {}
|
46 |
+
|
47 |
+
def _get_type_name(value: Any) -> str:
|
48 |
+
"""Get a descriptive type name for the value"""
|
49 |
+
if value is None:
|
50 |
+
return "null"
|
51 |
+
elif isinstance(value, bool):
|
52 |
+
return "boolean"
|
53 |
+
elif isinstance(value, int):
|
54 |
+
return "integer"
|
55 |
+
elif isinstance(value, float):
|
56 |
+
return "number"
|
57 |
+
elif isinstance(value, str):
|
58 |
+
return "string"
|
59 |
+
elif isinstance(value, list):
|
60 |
+
if not value:
|
61 |
+
return "array(empty)"
|
62 |
+
# Get the types of array elements
|
63 |
+
element_types = set(_get_type_name(item) for item in value)
|
64 |
+
if len(element_types) == 1:
|
65 |
+
return f"array({element_types.pop()})"
|
66 |
+
else:
|
67 |
+
return f"array(mixed: {', '.join(sorted(element_types))})"
|
68 |
+
elif isinstance(value, dict):
|
69 |
+
return "object"
|
70 |
+
else:
|
71 |
+
return type(value).__name__
|
72 |
+
|
73 |
+
def _collect_paths(obj: Any, path: str = "$"):
|
74 |
+
paths_with_types[path] = _get_type_name(obj)
|
75 |
+
|
76 |
+
if isinstance(obj, dict):
|
77 |
+
for key, value in obj.items():
|
78 |
+
current_path = f"{path}.{key}"
|
79 |
+
_collect_paths(value, current_path)
|
80 |
+
elif isinstance(obj, list):
|
81 |
+
array_path = f"{path}[*]"
|
82 |
+
if obj:
|
83 |
+
# Use first item as representative type for array elements
|
84 |
+
_collect_paths(obj[0], array_path)
|
85 |
+
|
86 |
+
_collect_paths(data)
|
87 |
+
return paths_with_types
|
88 |
+
|
89 |
+
|
90 |
+
def print_comparison_report(comparison: Dict[str, Any]):
|
91 |
+
"""
|
92 |
+
Print formatted comparison report
|
93 |
+
|
94 |
+
Args:
|
95 |
+
comparison: Results from compare_structures method
|
96 |
+
"""
|
97 |
+
|
98 |
+
print("=== JSONPATH STRUCTURE COMPARISON ===\n")
|
99 |
+
|
100 |
+
if comparison["added_paths"]:
|
101 |
+
print("➕ ADDED PATHS:")
|
102 |
+
for path in sorted(comparison["added_paths"]):
|
103 |
+
print(f" {path}")
|
104 |
+
print()
|
105 |
+
|
106 |
+
if comparison["removed_paths"]:
|
107 |
+
print("➖ REMOVED PATHS:")
|
108 |
+
for path in sorted(comparison["removed_paths"]):
|
109 |
+
print(f" {path}")
|
110 |
+
print()
|
111 |
+
|
112 |
+
if comparison.get("path_validations"):
|
113 |
+
print("🔍 PATH VALIDATIONS:")
|
114 |
+
for path, validation in comparison["path_validations"].items():
|
115 |
+
if "error" in validation:
|
116 |
+
print(f" ⚠️ {path}: {validation['error']}")
|
117 |
+
else:
|
118 |
+
status = validation["status"]
|
119 |
+
old_count = validation["old_count"]
|
120 |
+
new_count = validation["new_count"]
|
121 |
+
print(f" {status} {path}: {old_count} → {new_count}")
|
122 |
+
|
123 |
+
|
124 |
+
def print_comparison_report_with_types(comparison: Dict[str, Any]):
|
125 |
+
"""
|
126 |
+
Print formatted comparison report including type information
|
127 |
+
|
128 |
+
Args:
|
129 |
+
comparison: Results from compare_structures_with_types method
|
130 |
+
"""
|
131 |
+
print("=== JSONPATH STRUCTURE COMPARISON WITH TYPES ===\n")
|
132 |
+
|
133 |
+
if comparison["added_paths"]:
|
134 |
+
print("➕ ADDED PATHS:")
|
135 |
+
for path, type_info in sorted(comparison["added_paths"].items()):
|
136 |
+
print(f" {path} ({type_info})")
|
137 |
+
print()
|
138 |
+
|
139 |
+
if comparison["removed_paths"]:
|
140 |
+
print("➖ REMOVED PATHS:")
|
141 |
+
for path, type_info in sorted(comparison["removed_paths"].items()):
|
142 |
+
print(f" {path} ({type_info})")
|
143 |
+
print()
|
144 |
+
|
145 |
+
if comparison.get("type_changes"):
|
146 |
+
print("🔄 TYPE CHANGES:")
|
147 |
+
for path, change in sorted(comparison["type_changes"].items()):
|
148 |
+
print(f" {path}: {change['old_type']} → {change['new_type']}")
|
149 |
+
print()
|
150 |
+
|
151 |
+
if comparison.get("path_validations"):
|
152 |
+
print("🔍 PATH VALIDATIONS:")
|
153 |
+
for path, validation in comparison["path_validations"].items():
|
154 |
+
if "error" in validation:
|
155 |
+
print(f" ⚠️ {path}: {validation['error']}")
|
156 |
+
else:
|
157 |
+
status = validation["status"]
|
158 |
+
old_count = validation["old_count"]
|
159 |
+
new_count = validation["new_count"]
|
160 |
+
print(f" {status} {path}: {old_count} → {new_count}")
|
161 |
+
|
162 |
+
|
163 |
+
class JSONPathComparator:
|
164 |
+
"""
|
165 |
+
Compare JSON structures using JSONPath expressions
|
166 |
+
|
167 |
+
This class allows you to define expected paths and compare different
|
168 |
+
JSON responses to detect structural changes during refactoring.
|
169 |
+
"""
|
170 |
+
|
171 |
+
def __init__(self, common_paths: List[str] = None):
|
172 |
+
"""
|
173 |
+
Initialize comparator with common paths to validate
|
174 |
+
|
175 |
+
Args:
|
176 |
+
common_paths: List of JSONPath expressions to validate across responses
|
177 |
+
"""
|
178 |
+
self.common_paths = common_paths or []
|
179 |
+
|
180 |
+
def compare_structures(self, old_data: dict, new_data: dict) -> Dict[str, Any]:
|
181 |
+
"""
|
182 |
+
Compare two JSON structures using JSONPath
|
183 |
+
|
184 |
+
Args:
|
185 |
+
old_data: Original JSON structure
|
186 |
+
new_data: New JSON structure to compare against
|
187 |
+
|
188 |
+
Returns:
|
189 |
+
Dictionary containing comparison results with added/removed paths
|
190 |
+
and validation results for common paths
|
191 |
+
"""
|
192 |
+
|
193 |
+
old_paths = extract_structure_paths(old_data)
|
194 |
+
new_paths = extract_structure_paths(new_data)
|
195 |
+
|
196 |
+
comparison = {
|
197 |
+
"added_paths": new_paths - old_paths,
|
198 |
+
"removed_paths": old_paths - new_paths,
|
199 |
+
"common_paths": old_paths & new_paths,
|
200 |
+
"path_validations": {}
|
201 |
+
}
|
202 |
+
|
203 |
+
# Test common JSONPath expressions
|
204 |
+
for path_expr in self.common_paths:
|
205 |
+
try:
|
206 |
+
jsonpath = parse_ext(path_expr)
|
207 |
+
|
208 |
+
old_matches = [match.value for match in jsonpath.find(old_data)]
|
209 |
+
new_matches = [match.value for match in jsonpath.find(new_data)]
|
210 |
+
|
211 |
+
comparison["path_validations"][path_expr] = {
|
212 |
+
"old_found": len(old_matches) > 0,
|
213 |
+
"new_found": len(new_matches) > 0,
|
214 |
+
"old_count": len(old_matches),
|
215 |
+
"new_count": len(new_matches),
|
216 |
+
"status": "✅" if (len(old_matches) > 0) == (len(new_matches) > 0) else "❌"
|
217 |
+
}
|
218 |
+
|
219 |
+
except Exception as e:
|
220 |
+
comparison["path_validations"][path_expr] = {
|
221 |
+
"error": str(e),
|
222 |
+
"status": "⚠️"
|
223 |
+
}
|
224 |
+
|
225 |
+
return comparison
|
226 |
+
|
227 |
+
def compare_structures_with_types(self, old_data: dict, new_data: dict) -> Dict[str, Any]:
|
228 |
+
"""
|
229 |
+
Compare two JSON structures using JSONPath including type information
|
230 |
+
|
231 |
+
Args:
|
232 |
+
old_data: Original JSON structure
|
233 |
+
new_data: New JSON structure to compare against
|
234 |
+
|
235 |
+
Returns:
|
236 |
+
Dictionary containing comparison results with type information
|
237 |
+
"""
|
238 |
+
old_paths = extract_structure_paths_with_types(old_data)
|
239 |
+
new_paths = extract_structure_paths_with_types(new_data)
|
240 |
+
|
241 |
+
# Find paths that exist in both but have different types
|
242 |
+
type_changes = {}
|
243 |
+
common_paths = set(old_paths.keys()) & set(new_paths.keys())
|
244 |
+
for path in common_paths:
|
245 |
+
if old_paths[path] != new_paths[path]:
|
246 |
+
type_changes[path] = {
|
247 |
+
"old_type": old_paths[path],
|
248 |
+
"new_type": new_paths[path]
|
249 |
+
}
|
250 |
+
|
251 |
+
comparison = {
|
252 |
+
"added_paths": {k: v for k, v in new_paths.items() if k not in old_paths},
|
253 |
+
"removed_paths": {k: v for k, v in old_paths.items() if k not in new_paths},
|
254 |
+
"common_paths": {k: v for k, v in old_paths.items() if k in new_paths},
|
255 |
+
"type_changes": type_changes,
|
256 |
+
"path_validations": {}
|
257 |
+
}
|
258 |
+
|
259 |
+
# Test common JSONPath expressions
|
260 |
+
for path_expr in self.common_paths:
|
261 |
+
try:
|
262 |
+
jsonpath = parse_ext(path_expr)
|
263 |
+
|
264 |
+
old_matches = [match.value for match in jsonpath.find(old_data)]
|
265 |
+
new_matches = [match.value for match in jsonpath.find(new_data)]
|
266 |
+
|
267 |
+
comparison["path_validations"][path_expr] = {
|
268 |
+
"old_found": len(old_matches) > 0,
|
269 |
+
"new_found": len(new_matches) > 0,
|
270 |
+
"old_count": len(old_matches),
|
271 |
+
"new_count": len(new_matches),
|
272 |
+
"status": "✅" if (len(old_matches) > 0) == (len(new_matches) > 0) else "❌"
|
273 |
+
}
|
274 |
+
|
275 |
+
except Exception as e:
|
276 |
+
comparison["path_validations"][path_expr] = {
|
277 |
+
"error": str(e),
|
278 |
+
"status": "⚠️"
|
279 |
+
}
|
280 |
+
|
281 |
+
return comparison
|
282 |
+
|
283 |
+
|
284 |
+
def compare_json_with_jsonpath(old_data: dict, new_data: dict, common_paths: List[str] = None):
|
285 |
+
"""
|
286 |
+
Main function to compare JSON structures
|
287 |
+
|
288 |
+
Args:
|
289 |
+
old_data: Original JSON structure
|
290 |
+
new_data: New JSON structure to compare
|
291 |
+
common_paths: Optional list of JSONPath expressions to validate
|
292 |
+
|
293 |
+
Returns:
|
294 |
+
Dictionary containing comparison results
|
295 |
+
"""
|
296 |
+
comparator = JSONPathComparator(common_paths)
|
297 |
+
comparison = comparator.compare_structures(old_data, new_data)
|
298 |
+
print_comparison_report(comparison)
|
299 |
+
return comparison
|
300 |
+
|
301 |
+
|
302 |
+
def compare_json_with_jsonpath_and_types(old_data: dict, new_data: dict, common_paths: List[str] = None):
|
303 |
+
"""
|
304 |
+
Main function to compare JSON structures with type information
|
305 |
+
|
306 |
+
Args:
|
307 |
+
old_data: Original JSON structure
|
308 |
+
new_data: New JSON structure to compare
|
309 |
+
common_paths: Optional list of JSONPath expressions to validate
|
310 |
+
|
311 |
+
Returns:
|
312 |
+
Dictionary containing comparison results with type information
|
313 |
+
"""
|
314 |
+
comparator = JSONPathComparator(common_paths)
|
315 |
+
comparison = comparator.compare_structures_with_types(old_data, new_data)
|
316 |
+
print_comparison_report_with_types(comparison)
|
317 |
+
return comparison
|
my_ghost_writer/jsonpath_extractor.py
ADDED
@@ -0,0 +1,558 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import defaultdict
|
2 |
+
from typing import Any, Dict, Set
|
3 |
+
|
4 |
+
|
5 |
+
class JSONPathStructureAnalyzer:
|
6 |
+
"""
|
7 |
+
Analyze JSON structure using JSONPath expressions
|
8 |
+
|
9 |
+
Example usage:
|
10 |
+
analyzer = JSONPathStructureAnalyzer()
|
11 |
+
analyzer.extract_all_paths({"success": True, "data": {"users": [{"id": 1}]}})
|
12 |
+
print(analyzer.get_structure_report())
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self):
|
16 |
+
self.paths = set()
|
17 |
+
self.types = defaultdict(set)
|
18 |
+
self.samples = defaultdict(list)
|
19 |
+
self.array_lengths = {} # New: Store array lengths separately
|
20 |
+
|
21 |
+
def extract_all_paths(self, data: dict, max_samples: int = 3) -> Set[str]:
|
22 |
+
"""
|
23 |
+
Extract all possible JSONPath expressions from data
|
24 |
+
|
25 |
+
Args:
|
26 |
+
data: JSON data to analyze
|
27 |
+
max_samples: Maximum number of sample values to collect per path
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
Set of JSONPath expressions found in the data
|
31 |
+
"""
|
32 |
+
|
33 |
+
def _extract_recursive(obj: Any, path: str = "$"):
|
34 |
+
if isinstance(obj, dict):
|
35 |
+
for key, value in obj.items():
|
36 |
+
current_path = f"{path}.{key}"
|
37 |
+
self.paths.add(current_path)
|
38 |
+
self.types[current_path].add(type(value).__name__)
|
39 |
+
|
40 |
+
if not isinstance(value, (dict, list)) and len(self.samples[current_path]) < max_samples:
|
41 |
+
self.samples[current_path].append(str(value))
|
42 |
+
|
43 |
+
_extract_recursive(value, current_path)
|
44 |
+
|
45 |
+
elif isinstance(obj, list):
|
46 |
+
array_path = f"{path}[*]"
|
47 |
+
self.paths.add(array_path)
|
48 |
+
self.types[array_path].add("array") # Just store "array" as type
|
49 |
+
self.array_lengths[array_path] = len(obj) # Store length separately
|
50 |
+
|
51 |
+
if obj: # If array is not empty
|
52 |
+
# Process each item in the array to capture all possible structures
|
53 |
+
for item in obj:
|
54 |
+
_extract_recursive(item, array_path)
|
55 |
+
|
56 |
+
_extract_recursive(data)
|
57 |
+
return self.paths
|
58 |
+
|
59 |
+
def get_structure_report(self) -> str:
|
60 |
+
"""
|
61 |
+
Generate a structure report using JSONPath notation
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
Formatted string showing all paths with their types and sample values
|
65 |
+
"""
|
66 |
+
report = []
|
67 |
+
|
68 |
+
for path in sorted(self.paths):
|
69 |
+
types = list(self.types[path])
|
70 |
+
samples = self.samples.get(path, [])
|
71 |
+
|
72 |
+
if "array" in types:
|
73 |
+
array_length = self.array_lengths.get(path, 0)
|
74 |
+
report.append(f"{path} -- array[{array_length}]")
|
75 |
+
elif samples:
|
76 |
+
if len(samples) > 1:
|
77 |
+
unique_count = len(set(samples))
|
78 |
+
if unique_count > 1:
|
79 |
+
sample_range = f"{samples[0]} .. {samples[-1]} ({unique_count} unique values)"
|
80 |
+
else:
|
81 |
+
sample_range = samples[0]
|
82 |
+
else:
|
83 |
+
sample_range = samples[0]
|
84 |
+
report.append(f"{path} -- {sample_range}")
|
85 |
+
else:
|
86 |
+
type_info = "/".join(types)
|
87 |
+
report.append(f"{path} -- {type_info}")
|
88 |
+
|
89 |
+
return "\n".join(report)
|
90 |
+
|
91 |
+
def get_paths_with_types(self) -> Dict[str, str]:
|
92 |
+
"""
|
93 |
+
Get all paths with their associated value types
|
94 |
+
|
95 |
+
Returns:
|
96 |
+
Dictionary mapping JSONPath expressions to their value types
|
97 |
+
"""
|
98 |
+
paths_with_types = {}
|
99 |
+
|
100 |
+
for path in self.paths:
|
101 |
+
types = list(self.types[path])
|
102 |
+
|
103 |
+
if "array" in types:
|
104 |
+
paths_with_types[path] = "array"
|
105 |
+
elif len(types) == 1:
|
106 |
+
# Single type
|
107 |
+
paths_with_types[path] = types[0]
|
108 |
+
elif len(types) > 1:
|
109 |
+
# Multiple types (mixed)
|
110 |
+
paths_with_types[path] = f"mixed({', '.join(sorted(types))})"
|
111 |
+
else:
|
112 |
+
# No type info available
|
113 |
+
paths_with_types[path] = "unknown"
|
114 |
+
|
115 |
+
return paths_with_types
|
116 |
+
|
117 |
+
def get_array_lengths(self) -> Dict[str, int]:
|
118 |
+
"""
|
119 |
+
Get array lengths for all array paths
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
Dictionary mapping array paths to their lengths
|
123 |
+
"""
|
124 |
+
return self.array_lengths.copy()
|
125 |
+
|
126 |
+
def get_detailed_type_report(self, get_samples: bool = True) -> Dict[str, Dict[str, Any]]:
|
127 |
+
"""
|
128 |
+
Get detailed type information for each path including samples
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
Dictionary with detailed type information for each path
|
132 |
+
"""
|
133 |
+
detailed_report = {}
|
134 |
+
|
135 |
+
for path in sorted(self.paths):
|
136 |
+
types = list(self.types[path])
|
137 |
+
samples = self.samples.get(path, [])
|
138 |
+
|
139 |
+
path_info = {
|
140 |
+
"types": types,
|
141 |
+
"primary_type": None,
|
142 |
+
"is_array": "array" in types,
|
143 |
+
# "samples": samples,
|
144 |
+
# "sample_count": len(samples)
|
145 |
+
}
|
146 |
+
if get_samples:
|
147 |
+
path_info["samples"] = samples
|
148 |
+
path_info["sample_count"] = len(samples)
|
149 |
+
|
150 |
+
# Add array length if it's an array
|
151 |
+
if path_info["is_array"]:
|
152 |
+
path_info["array_length"] = self.array_lengths.get(path, 0)
|
153 |
+
|
154 |
+
# Determine primary type
|
155 |
+
if path_info["is_array"]:
|
156 |
+
path_info["primary_type"] = "array"
|
157 |
+
elif len(types) == 1:
|
158 |
+
path_info["primary_type"] = types[0]
|
159 |
+
elif len(types) > 1:
|
160 |
+
path_info["primary_type"] = f"mixed({', '.join(sorted(types))})"
|
161 |
+
else:
|
162 |
+
path_info["primary_type"] = "unknown"
|
163 |
+
|
164 |
+
detailed_report[path] = path_info
|
165 |
+
|
166 |
+
return detailed_report
|
167 |
+
|
168 |
+
def compare_json_structures(self, other_data: dict) -> Dict[str, Any]:
|
169 |
+
"""
|
170 |
+
Compare this analyzer's data with another JSON structure
|
171 |
+
|
172 |
+
Args:
|
173 |
+
other_data: JSON data to compare against
|
174 |
+
|
175 |
+
Returns:
|
176 |
+
Dictionary containing detailed comparison results
|
177 |
+
"""
|
178 |
+
# Analyze the other data
|
179 |
+
other_analyzer = JSONPathStructureAnalyzer()
|
180 |
+
other_analyzer.extract_all_paths(other_data)
|
181 |
+
|
182 |
+
# Get paths and types for both
|
183 |
+
self_paths_types = self.get_paths_with_types()
|
184 |
+
other_paths_types = other_analyzer.get_paths_with_types()
|
185 |
+
|
186 |
+
# Get array lengths
|
187 |
+
self_array_lengths = self.get_array_lengths()
|
188 |
+
other_array_lengths = other_analyzer.get_array_lengths()
|
189 |
+
|
190 |
+
# Find path differences
|
191 |
+
self_only_paths = set(self_paths_types.keys()) - set(other_paths_types.keys())
|
192 |
+
other_only_paths = set(other_paths_types.keys()) - set(self_paths_types.keys())
|
193 |
+
common_paths = set(self_paths_types.keys()) & set(other_paths_types.keys())
|
194 |
+
|
195 |
+
# Analyze changes
|
196 |
+
type_changes = {}
|
197 |
+
value_differences = {}
|
198 |
+
array_size_changes = {}
|
199 |
+
|
200 |
+
for path in common_paths:
|
201 |
+
self_type = self_paths_types[path]
|
202 |
+
other_type = other_paths_types[path]
|
203 |
+
|
204 |
+
# Check for type changes
|
205 |
+
if self_type != other_type:
|
206 |
+
type_changes[path] = {
|
207 |
+
"old_type": self_type,
|
208 |
+
"new_type": other_type
|
209 |
+
}
|
210 |
+
|
211 |
+
# Check for array size changes (now much cleaner!)
|
212 |
+
if self_type == "array" and other_type == "array":
|
213 |
+
self_length = self_array_lengths.get(path, 0)
|
214 |
+
other_length = other_array_lengths.get(path, 0)
|
215 |
+
|
216 |
+
if self_length != other_length:
|
217 |
+
array_size_changes[path] = {
|
218 |
+
"old_size": self_length,
|
219 |
+
"new_size": other_length,
|
220 |
+
"size_change": other_length - self_length
|
221 |
+
}
|
222 |
+
|
223 |
+
# Check for value differences (non-array paths)
|
224 |
+
if self_type != "array" and other_type != "array":
|
225 |
+
self_samples = self.samples.get(path, [])
|
226 |
+
other_samples = other_analyzer.samples.get(path, [])
|
227 |
+
|
228 |
+
if self_samples and other_samples:
|
229 |
+
# Compare first sample values
|
230 |
+
if self_samples[0] != other_samples[0]:
|
231 |
+
value_differences[path] = {
|
232 |
+
"old_value": self_samples[0],
|
233 |
+
"new_value": other_samples[0],
|
234 |
+
"old_samples": self_samples,
|
235 |
+
"new_samples": other_samples
|
236 |
+
}
|
237 |
+
|
238 |
+
return {
|
239 |
+
"added_paths": {path: other_paths_types[path] for path in other_only_paths},
|
240 |
+
"removed_paths": {path: self_paths_types[path] for path in self_only_paths},
|
241 |
+
"common_paths": {path: self_paths_types[path] for path in common_paths},
|
242 |
+
"type_changes": type_changes,
|
243 |
+
"value_differences": value_differences,
|
244 |
+
"array_size_changes": array_size_changes,
|
245 |
+
"array_lengths_old": {path: length for path, length in self_array_lengths.items() if path in common_paths or path in self_only_paths},
|
246 |
+
"array_lengths_new": {path: length for path, length in other_array_lengths.items() if path in common_paths or path in other_only_paths},
|
247 |
+
"summary": {
|
248 |
+
"total_paths_old": len(self_paths_types),
|
249 |
+
"total_paths_new": len(other_paths_types),
|
250 |
+
"paths_added": len(other_only_paths),
|
251 |
+
"paths_removed": len(self_only_paths),
|
252 |
+
"paths_common": len(common_paths),
|
253 |
+
"type_changes_count": len(type_changes),
|
254 |
+
"value_changes_count": len(value_differences),
|
255 |
+
"array_size_changes_count": len(array_size_changes)
|
256 |
+
}
|
257 |
+
}
|
258 |
+
|
259 |
+
def filter_paths_excluding_keys(self, exclude_keys: set[str]) -> set[str]:
|
260 |
+
"""
|
261 |
+
Filter existing paths to exclude those containing specific keys
|
262 |
+
|
263 |
+
Args:
|
264 |
+
exclude_keys: set of keys to exclude
|
265 |
+
|
266 |
+
Returns:
|
267 |
+
Filtered set of paths
|
268 |
+
"""
|
269 |
+
filtered_paths = set()
|
270 |
+
|
271 |
+
for path in self.paths:
|
272 |
+
# Check if any excluded key appears in the path
|
273 |
+
path_contains_excluded = False
|
274 |
+
for exclude_key in exclude_keys:
|
275 |
+
if f".{exclude_key}" in path or f".{exclude_key}[" in path:
|
276 |
+
path_contains_excluded = True
|
277 |
+
break
|
278 |
+
|
279 |
+
if not path_contains_excluded:
|
280 |
+
filtered_paths.add(path)
|
281 |
+
|
282 |
+
return filtered_paths
|
283 |
+
|
284 |
+
def get_filtered_structure_report(self, exclude_keys: set[str] = None) -> str:
|
285 |
+
"""
|
286 |
+
Generate structure report excluding specific keys
|
287 |
+
|
288 |
+
Args:
|
289 |
+
exclude_keys: set of keys to exclude from report
|
290 |
+
|
291 |
+
Returns:
|
292 |
+
Filtered structure report
|
293 |
+
"""
|
294 |
+
if exclude_keys is None:
|
295 |
+
exclude_keys = set()
|
296 |
+
|
297 |
+
filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
|
298 |
+
report = []
|
299 |
+
|
300 |
+
for path in sorted(filtered_paths):
|
301 |
+
types = list(self.types[path])
|
302 |
+
samples = self.samples.get(path, [])
|
303 |
+
|
304 |
+
if "array" in types:
|
305 |
+
array_length = self.array_lengths.get(path, 0)
|
306 |
+
report.append(f"{path} -- array[{array_length}]")
|
307 |
+
elif samples:
|
308 |
+
if len(samples) > 1:
|
309 |
+
unique_count = len(set(samples))
|
310 |
+
if unique_count > 1:
|
311 |
+
sample_range = f"{samples[0]} .. {samples[-1]} ({unique_count} unique values)"
|
312 |
+
else:
|
313 |
+
sample_range = samples[0]
|
314 |
+
else:
|
315 |
+
sample_range = samples[0]
|
316 |
+
report.append(f"{path} -- {sample_range}")
|
317 |
+
else:
|
318 |
+
type_info = "/".join(types)
|
319 |
+
report.append(f"{path} -- {type_info}")
|
320 |
+
|
321 |
+
return "\n".join(report)
|
322 |
+
|
323 |
+
def get_filtered_paths_with_types(self, exclude_keys: set[str] = None) -> dict[str, str]:
|
324 |
+
"""
|
325 |
+
Get paths with types excluding specific keys
|
326 |
+
|
327 |
+
Args:
|
328 |
+
exclude_keys: set of keys to exclude
|
329 |
+
|
330 |
+
Returns:
|
331 |
+
Dictionary mapping filtered JSONPath expressions to their value types
|
332 |
+
"""
|
333 |
+
if exclude_keys is None:
|
334 |
+
exclude_keys = set()
|
335 |
+
|
336 |
+
filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
|
337 |
+
paths_with_types = {}
|
338 |
+
|
339 |
+
for path in filtered_paths:
|
340 |
+
types = list(self.types[path])
|
341 |
+
|
342 |
+
if "array" in types:
|
343 |
+
paths_with_types[path] = "array"
|
344 |
+
elif len(types) == 1:
|
345 |
+
paths_with_types[path] = types[0]
|
346 |
+
elif len(types) > 1:
|
347 |
+
paths_with_types[path] = f"mixed({', '.join(sorted(types))})"
|
348 |
+
else:
|
349 |
+
paths_with_types[path] = "unknown"
|
350 |
+
|
351 |
+
return paths_with_types
|
352 |
+
|
353 |
+
def get_filtered_detailed_type_report(self, exclude_keys: set[str] = None) -> dict[str, dict[str, Any]]:
|
354 |
+
"""
|
355 |
+
Get detailed type information excluding specific keys
|
356 |
+
|
357 |
+
Args:
|
358 |
+
exclude_keys: set of keys to exclude
|
359 |
+
|
360 |
+
Returns:
|
361 |
+
Dictionary with detailed type information for filtered paths
|
362 |
+
"""
|
363 |
+
if exclude_keys is None:
|
364 |
+
exclude_keys = set()
|
365 |
+
|
366 |
+
filtered_paths = self.filter_paths_excluding_keys(exclude_keys)
|
367 |
+
detailed_report = {}
|
368 |
+
|
369 |
+
for path in sorted(filtered_paths):
|
370 |
+
types = list(self.types[path])
|
371 |
+
samples = self.samples.get(path, [])
|
372 |
+
|
373 |
+
path_info = {
|
374 |
+
"types": types,
|
375 |
+
"primary_type": None,
|
376 |
+
"is_array": "array" in types,
|
377 |
+
"samples": samples,
|
378 |
+
"sample_count": len(samples)
|
379 |
+
}
|
380 |
+
|
381 |
+
if path_info["is_array"]:
|
382 |
+
path_info["array_length"] = self.array_lengths.get(path, 0)
|
383 |
+
|
384 |
+
if path_info["is_array"]:
|
385 |
+
path_info["primary_type"] = "array"
|
386 |
+
elif len(types) == 1:
|
387 |
+
path_info["primary_type"] = types[0]
|
388 |
+
elif len(types) > 1:
|
389 |
+
path_info["primary_type"] = f"mixed({', '.join(sorted(types))})"
|
390 |
+
else:
|
391 |
+
path_info["primary_type"] = "unknown"
|
392 |
+
|
393 |
+
detailed_report[path] = path_info
|
394 |
+
|
395 |
+
return detailed_report
|
396 |
+
|
397 |
+
|
398 |
+
def analyze_with_jsonpath(data: dict) -> str:
|
399 |
+
"""
|
400 |
+
Analyze JSON structure using JSONPath
|
401 |
+
|
402 |
+
Args:
|
403 |
+
data: Dictionary containing JSON data to analyze
|
404 |
+
|
405 |
+
Returns:
|
406 |
+
Formatted structure report string
|
407 |
+
"""
|
408 |
+
analyzer = JSONPathStructureAnalyzer()
|
409 |
+
analyzer.extract_all_paths(data)
|
410 |
+
return analyzer.get_structure_report()
|
411 |
+
|
412 |
+
|
413 |
+
def analyze_with_jsonpath_types(data: dict) -> Dict[str, str]:
|
414 |
+
"""
|
415 |
+
Analyze JSON structure and return paths with their types
|
416 |
+
|
417 |
+
Args:
|
418 |
+
data: Dictionary containing JSON data to analyze
|
419 |
+
|
420 |
+
Returns:
|
421 |
+
Dictionary mapping JSONPath expressions to their value types
|
422 |
+
"""
|
423 |
+
analyzer = JSONPathStructureAnalyzer()
|
424 |
+
analyzer.extract_all_paths(data)
|
425 |
+
return analyzer.get_paths_with_types()
|
426 |
+
|
427 |
+
|
428 |
+
def analyze_with_jsonpath_detailed(data: dict) -> Dict[str, Dict[str, Any]]:
|
429 |
+
"""
|
430 |
+
Analyze JSON structure and return detailed type information
|
431 |
+
|
432 |
+
Args:
|
433 |
+
data: Dictionary containing JSON data to analyze
|
434 |
+
|
435 |
+
Returns:
|
436 |
+
Dictionary with detailed type information for each path
|
437 |
+
"""
|
438 |
+
analyzer = JSONPathStructureAnalyzer()
|
439 |
+
analyzer.extract_all_paths(data)
|
440 |
+
return analyzer.get_detailed_type_report()
|
441 |
+
|
442 |
+
|
443 |
+
def compare_json_with_jsonpath_structures(old_data: dict, new_data: dict, print_report: bool = True) -> Dict[str, Any]:
|
444 |
+
"""
|
445 |
+
Compare two JSON structures using JSONPath analysis
|
446 |
+
|
447 |
+
Args:
|
448 |
+
old_data: Original JSON structure
|
449 |
+
new_data: New JSON structure to compare against
|
450 |
+
print_report: Whether to print the comparison report
|
451 |
+
|
452 |
+
Returns:
|
453 |
+
Dictionary containing detailed comparison results
|
454 |
+
"""
|
455 |
+
# Analyze old structure
|
456 |
+
old_analyzer = JSONPathStructureAnalyzer()
|
457 |
+
old_analyzer.extract_all_paths(old_data)
|
458 |
+
|
459 |
+
# Compare with new structure
|
460 |
+
comparison = old_analyzer.compare_json_structures(new_data)
|
461 |
+
|
462 |
+
if print_report:
|
463 |
+
print_comparison_report(comparison)
|
464 |
+
|
465 |
+
return comparison
|
466 |
+
|
467 |
+
|
468 |
+
def print_comparison_report(comparison: Dict[str, Any]):
|
469 |
+
"""
|
470 |
+
Print a formatted comparison report
|
471 |
+
|
472 |
+
Args:
|
473 |
+
comparison: Result from compare_json_structures method
|
474 |
+
"""
|
475 |
+
print("=== JSON STRUCTURE COMPARISON REPORT ===\n")
|
476 |
+
|
477 |
+
# Summary
|
478 |
+
summary = comparison["summary"]
|
479 |
+
print(f"📊 SUMMARY:")
|
480 |
+
print(f" Old structure: {summary['total_paths_old']} paths")
|
481 |
+
print(f" New structure: {summary['total_paths_new']} paths")
|
482 |
+
print(f" Added: {summary['paths_added']} paths")
|
483 |
+
print(f" Removed: {summary['paths_removed']} paths")
|
484 |
+
print(f" Common: {summary['paths_common']} paths")
|
485 |
+
print(f" Type changes: {summary['type_changes_count']}")
|
486 |
+
print(f" Value changes: {summary['value_changes_count']}")
|
487 |
+
print(f" Array size changes: {summary['array_size_changes_count']}")
|
488 |
+
print()
|
489 |
+
|
490 |
+
# Added paths
|
491 |
+
if comparison["added_paths"]:
|
492 |
+
print("➕ ADDED PATHS:")
|
493 |
+
for path, type_info in sorted(comparison["added_paths"].items()):
|
494 |
+
print(f" {path} ({type_info})")
|
495 |
+
print()
|
496 |
+
|
497 |
+
# Removed paths
|
498 |
+
if comparison["removed_paths"]:
|
499 |
+
print("➖ REMOVED PATHS:")
|
500 |
+
for path, type_info in sorted(comparison["removed_paths"].items()):
|
501 |
+
print(f" {path} ({type_info})")
|
502 |
+
print()
|
503 |
+
|
504 |
+
# Type changes
|
505 |
+
if comparison["type_changes"]:
|
506 |
+
print("🔄 TYPE CHANGES:")
|
507 |
+
for path, change in sorted(comparison["type_changes"].items()):
|
508 |
+
print(f" {path}: {change['old_type']} → {change['new_type']}")
|
509 |
+
print()
|
510 |
+
|
511 |
+
# Array size changes
|
512 |
+
if comparison["array_size_changes"]:
|
513 |
+
print("📏 ARRAY SIZE CHANGES:")
|
514 |
+
for path, change in sorted(comparison["array_size_changes"].items()):
|
515 |
+
size_change = change['size_change']
|
516 |
+
direction = "📈" if size_change > 0 else "📉"
|
517 |
+
print(f" {direction} {path}: {change['old_size']} → {change['new_size']} (Δ{size_change:+d})")
|
518 |
+
print()
|
519 |
+
|
520 |
+
# Value differences
|
521 |
+
if comparison["value_differences"]:
|
522 |
+
print("💱 VALUE CHANGES:")
|
523 |
+
for path, change in sorted(comparison["value_differences"].items()):
|
524 |
+
print(f" {path}: '{change['old_value']}' → '{change['new_value']}'")
|
525 |
+
print()
|
526 |
+
|
527 |
+
|
528 |
+
def analyze_dict_list_simple(dict_list: list[dict], exclude_keys: set[str] = None) -> list[dict[str, Any]]:
|
529 |
+
"""
|
530 |
+
Analyze each dict separately and return list of results
|
531 |
+
|
532 |
+
Args:
|
533 |
+
dict_list: list of dictionaries to analyze
|
534 |
+
exclude_keys: set of keys to exclude from analysis
|
535 |
+
|
536 |
+
Returns:
|
537 |
+
list of individual analysis results
|
538 |
+
"""
|
539 |
+
if exclude_keys is None:
|
540 |
+
exclude_keys = set()
|
541 |
+
|
542 |
+
results = []
|
543 |
+
|
544 |
+
for i, data_dict in enumerate(dict_list):
|
545 |
+
analyzer = JSONPathStructureAnalyzer()
|
546 |
+
analyzer.extract_all_paths(data_dict)
|
547 |
+
|
548 |
+
result = {
|
549 |
+
"index": i,
|
550 |
+
"paths_with_types": analyzer.get_filtered_paths_with_types(exclude_keys),
|
551 |
+
"detailed_report": analyzer.get_filtered_detailed_type_report(exclude_keys),
|
552 |
+
"array_lengths": {k: v for k, v in analyzer.get_array_lengths().items()
|
553 |
+
if k in analyzer.filter_paths_excluding_keys(exclude_keys)},
|
554 |
+
"structure_report": analyzer.get_filtered_structure_report(exclude_keys)
|
555 |
+
}
|
556 |
+
results.append(result)
|
557 |
+
|
558 |
+
return results
|
my_ghost_writer/text_parsers2.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
from datetime import datetime
|
|
|
2 |
|
3 |
-
import spacy
|
4 |
import nltk
|
5 |
# pynflect needed to avoid different inflection
|
6 |
import pyinflect
|
7 |
-
|
8 |
from fastapi import HTTPException
|
9 |
|
10 |
-
from my_ghost_writer.constants import
|
11 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
12 |
from my_ghost_writer.thesaurus import wn
|
13 |
-
from my_ghost_writer.type_hints import
|
|
|
14 |
|
15 |
|
16 |
custom_synonym_handler = CustomSynonymHandler()
|
@@ -40,11 +41,20 @@ def is_nlp_available() -> bool:
|
|
40 |
return nlp is not None
|
41 |
|
42 |
|
43 |
-
def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[
|
44 |
"""
|
45 |
-
Finds
|
46 |
It analyzes the span, filters for meaningful words (nouns, verbs, etc.),
|
47 |
-
and returns a list of
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"""
|
49 |
if nlp is None:
|
50 |
app_logger.error(
|
@@ -62,7 +72,7 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
|
|
62 |
return []
|
63 |
|
64 |
# Define which POS tags are eligible for synonym lookup
|
65 |
-
results: list[
|
66 |
|
67 |
for token in span:
|
68 |
# Process only if the token is an eligible part of speech and not a stop word or punctuation
|
@@ -73,18 +83,18 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
|
|
73 |
text, token.idx, token.idx + len(token.text), token.text
|
74 |
)
|
75 |
|
76 |
-
# 2. Get
|
77 |
-
|
78 |
|
79 |
-
# 3. If we find
|
80 |
-
if
|
81 |
# Restructure dicts into Pydantic models for type safety
|
82 |
context_info_model = ContextInfo(
|
83 |
-
pos=context_info_dict[
|
84 |
-
sentence=context_info_dict[
|
85 |
-
grammatical_form=context_info_dict[
|
86 |
-
context_words=context_info_dict[
|
87 |
-
dependency=context_info_dict[
|
88 |
)
|
89 |
local_start_idx = token.idx - start_idx
|
90 |
local_end_idx = local_start_idx + len(token.text)
|
@@ -92,17 +102,17 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
|
|
92 |
sliced_word = sliced_sentence[local_start_idx:local_end_idx]
|
93 |
assert sliced_word == token.text, (f"Mismatch! sliced_word ({sliced_word}) != token.text ({token.text}), but these substrings should be equal.\n"
|
94 |
f" start_idx:{start_idx}, End_word:{end_idx}. local_start_idx:{local_start_idx}, local_end_idx:{local_end_idx}.")
|
95 |
-
word_result =
|
96 |
original_word=token.text,
|
97 |
original_indices={"start": local_start_idx, "end": local_end_idx},
|
98 |
context_info=context_info_model,
|
99 |
-
|
100 |
debug_info={
|
101 |
"spacy_token_indices": {
|
102 |
-
"start": context_info_dict[
|
103 |
-
"end": context_info_dict[
|
104 |
},
|
105 |
-
"lemma": context_info_dict[
|
106 |
}
|
107 |
)
|
108 |
results.append(word_result)
|
@@ -116,7 +126,19 @@ def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[Wo
|
|
116 |
|
117 |
|
118 |
def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int, target_word: str) -> dict[str, Any]:
|
119 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
if nlp is None:
|
121 |
raise HTTPException(status_code=500, detail="spaCy model not available")
|
122 |
|
@@ -160,20 +182,20 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
|
|
160 |
context_words = [t.text for t in sentence_tokens[context_start:context_end]]
|
161 |
|
162 |
return {
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
}
|
178 |
|
179 |
except Exception as indices_ex:
|
@@ -182,58 +204,115 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
|
|
182 |
|
183 |
|
184 |
def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
|
185 |
-
"""
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
-
|
|
|
|
|
|
|
189 |
word_lower = word.lower()
|
190 |
-
synonyms_by_sense: list[dict[str, Any]] = [] # Initialize the list here
|
191 |
-
|
192 |
-
# 1. Custom Synonym Lookup and Preparation
|
193 |
-
custom_synset = None # Initialize to None
|
194 |
-
# 1. Direct Lookup: Check if the word is directly in custom_synonyms
|
195 |
-
related_synonyms = custom_synonym_handler.get_related(word_lower, "synonym")
|
196 |
-
if related_synonyms:
|
197 |
-
app_logger.info(f"found custom_synonyms:{related_synonyms} by word:{word_lower}!")
|
198 |
-
synonyms_list: list[dict[str, Any]] = []
|
199 |
-
for related in related_synonyms:
|
200 |
-
words = related["words"]
|
201 |
-
for word_from_related_words in words:
|
202 |
-
synonyms_list.append({"synonym": word_from_related_words, "is_custom": True, "definition": related.get("definition")})
|
203 |
-
if synonyms_list:
|
204 |
-
custom_synset = {
|
205 |
-
'definition': 'User-defined synonym.',
|
206 |
-
'examples': [],
|
207 |
-
'synonyms': synonyms_list
|
208 |
-
}
|
209 |
-
if pos_tag:
|
210 |
-
custom_synset["pos"] = pos_tag
|
211 |
-
|
212 |
-
# 2. Reverse Lookup: Check if the word is a *synonym* of any custom word
|
213 |
-
reverse_lookup_words = custom_synonym_handler.reverse_lookup(word_lower)
|
214 |
-
|
215 |
-
if reverse_lookup_words:
|
216 |
-
app_logger.info(f"found reverse match: '{word_lower}' is a synonym of '{reverse_lookup_words}'")
|
217 |
-
# Found a reverse match!
|
218 |
-
# The reverse_lookup return the original word, not a list of synonyms
|
219 |
-
synonyms_list: list[dict[str, Any]] = [{"synonym": reverse_word, "is_custom": True} for reverse_word in reverse_lookup_words]
|
220 |
-
|
221 |
-
custom_synset = {
|
222 |
-
'definition': f'User-defined synonym (reverse match for "{word}").',
|
223 |
-
'examples': [],
|
224 |
-
'synonyms': synonyms_list
|
225 |
-
}
|
226 |
-
if pos_tag:
|
227 |
-
custom_synset["pos"] = pos_tag
|
228 |
|
229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
try:
|
231 |
# Map spaCy POS to wn POS
|
232 |
pos_map = {
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
}
|
238 |
|
239 |
# Get all synsets for the word
|
@@ -243,38 +322,34 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
|
|
243 |
if pos_tag and pos_tag in pos_map:
|
244 |
synsets = [s for s in synsets if s.pos() == pos_map[pos_tag]]
|
245 |
|
|
|
246 |
for synset in synsets:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
}
|
252 |
-
# Add pos only if it's available
|
253 |
-
syn_pos = synset.pos()
|
254 |
-
if syn_pos:
|
255 |
-
sense_data['pos'] = syn_pos
|
256 |
-
|
257 |
-
# Use a set to avoid duplicate synonyms from different lemmas in the same synset
|
258 |
-
unique_synonyms = set()
|
259 |
for lemma in synset.lemmas():
|
260 |
-
|
261 |
-
if
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
except Exception as ex1:
|
270 |
-
app_logger.error(f"Error getting wn synonyms: {ex1}")
|
271 |
-
raise HTTPException(status_code=500, detail=f"Error retrieving
|
272 |
-
|
273 |
-
# 4. Combine Custom and WordNet Synsets
|
274 |
-
if custom_synset:
|
275 |
-
synonyms_by_sense.insert(0, custom_synset) # Add custom synset at the beginning
|
276 |
|
277 |
-
return
|
278 |
|
279 |
|
280 |
def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
@@ -283,24 +358,24 @@ def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
|
283 |
if nlp is None:
|
284 |
return synonym
|
285 |
|
286 |
-
pos = original_token_info.get(
|
287 |
-
tag = original_token_info.get(
|
288 |
|
289 |
# Handle capitalization first using .get() for safety
|
290 |
-
if original_token_info.get(
|
291 |
synonym = synonym.title() # .title() is better for multi-word phrases
|
292 |
-
elif original_token_info.get(
|
293 |
synonym = synonym.upper()
|
294 |
-
elif original_token_info.get(
|
295 |
synonym = synonym.lower()
|
296 |
|
297 |
# Handle grammatical inflection
|
298 |
try:
|
299 |
# Define all tags that require inflection in one place
|
300 |
inflection_tags = {
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
}
|
305 |
|
306 |
# Single check for all inflection cases
|
@@ -320,50 +395,58 @@ def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
|
320 |
return synonym
|
321 |
|
322 |
|
323 |
-
def process_synonym_groups(word: str, context_info: dict[str, Any]) -> list[
|
324 |
-
"""Process
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
t0 = datetime.now()
|
327 |
-
# Get
|
328 |
-
|
329 |
t1 = datetime.now()
|
330 |
duration = (t1 - t0).total_seconds()
|
331 |
app_logger.info(f"# 1/Got get_wordnet_synonyms result with '{word}' word in {duration:.3f}s.")
|
332 |
|
333 |
-
if not
|
334 |
return []
|
335 |
|
336 |
-
# Process each
|
337 |
-
|
338 |
-
for
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
return
|
|
|
1 |
from datetime import datetime
|
2 |
+
from typing import Any, Optional
|
3 |
|
|
|
4 |
import nltk
|
5 |
# pynflect needed to avoid different inflection
|
6 |
import pyinflect
|
7 |
+
import spacy
|
8 |
from fastapi import HTTPException
|
9 |
|
10 |
+
from my_ghost_writer.constants import ELIGIBLE_POS, NLTK_DATA, SPACY_MODEL_NAME, app_logger
|
11 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
12 |
from my_ghost_writer.thesaurus import wn
|
13 |
+
from my_ghost_writer.type_hints import ContextInfo, RelatedWordGroup, RelatedWordOption, RelatedWordWordResult, \
|
14 |
+
TermRelationships
|
15 |
|
16 |
|
17 |
custom_synonym_handler = CustomSynonymHandler()
|
|
|
41 |
return nlp is not None
|
42 |
|
43 |
|
44 |
+
def find_synonyms_for_phrase(text: str, start_idx: int, end_idx: int) -> list[RelatedWordWordResult]:
|
45 |
"""
|
46 |
+
Finds related words for all eligible words within a selected text span.
|
47 |
It analyzes the span, filters for meaningful words (nouns, verbs, etc.),
|
48 |
+
and returns a list of related word results for each.
|
49 |
+
Raises: HTTPException: If the spaCy model is unavailable.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
text: The input text (str).
|
53 |
+
start_idx: The start index of the phrase within the text (int).
|
54 |
+
end_idx: The end index of the phrase within the text (int).
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
A list of RelatedWordWordResult objects, representing the related words for each eligible word (list[RelatedWordWordResult]).
|
58 |
"""
|
59 |
if nlp is None:
|
60 |
app_logger.error(
|
|
|
72 |
return []
|
73 |
|
74 |
# Define which POS tags are eligible for synonym lookup
|
75 |
+
results: list[RelatedWordWordResult] = []
|
76 |
|
77 |
for token in span:
|
78 |
# Process only if the token is an eligible part of speech and not a stop word or punctuation
|
|
|
83 |
text, token.idx, token.idx + len(token.text), token.text
|
84 |
)
|
85 |
|
86 |
+
# 2. Get related word groups using the token's lemma for a better search
|
87 |
+
related_word_groups_list = process_synonym_groups(context_info_dict["lemma"], context_info_dict)
|
88 |
|
89 |
+
# 3. If we find related words, build the result object for this word
|
90 |
+
if related_word_groups_list:
|
91 |
# Restructure dicts into Pydantic models for type safety
|
92 |
context_info_model = ContextInfo(
|
93 |
+
pos=context_info_dict["pos"],
|
94 |
+
sentence=context_info_dict["context_sentence"],
|
95 |
+
grammatical_form=context_info_dict["tag"],
|
96 |
+
context_words=context_info_dict["context_words"],
|
97 |
+
dependency=context_info_dict["dependency"],
|
98 |
)
|
99 |
local_start_idx = token.idx - start_idx
|
100 |
local_end_idx = local_start_idx + len(token.text)
|
|
|
102 |
sliced_word = sliced_sentence[local_start_idx:local_end_idx]
|
103 |
assert sliced_word == token.text, (f"Mismatch! sliced_word ({sliced_word}) != token.text ({token.text}), but these substrings should be equal.\n"
|
104 |
f" start_idx:{start_idx}, End_word:{end_idx}. local_start_idx:{local_start_idx}, local_end_idx:{local_end_idx}.")
|
105 |
+
word_result = RelatedWordWordResult(
|
106 |
original_word=token.text,
|
107 |
original_indices={"start": local_start_idx, "end": local_end_idx},
|
108 |
context_info=context_info_model,
|
109 |
+
related_word_groups=related_word_groups_list,
|
110 |
debug_info={
|
111 |
"spacy_token_indices": {
|
112 |
+
"start": context_info_dict["char_start"],
|
113 |
+
"end": context_info_dict["char_end"],
|
114 |
},
|
115 |
+
"lemma": context_info_dict["lemma"]
|
116 |
}
|
117 |
)
|
118 |
results.append(word_result)
|
|
|
126 |
|
127 |
|
128 |
def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int, target_word: str) -> dict[str, Any]:
|
129 |
+
"""
|
130 |
+
Extract grammatical and contextual information using character indices.
|
131 |
+
Raises: HTTPException: If the spaCy model is unavailable or if the indices are invalid.
|
132 |
+
|
133 |
+
Args:
|
134 |
+
text: The input text (str).
|
135 |
+
start_idx: The start index of the word within the text (int).
|
136 |
+
end_idx: The end index of the word within the text (int).
|
137 |
+
target_word: The target word (str).
|
138 |
+
|
139 |
+
Returns:
|
140 |
+
A dictionary containing contextual information about the word (dict[str, Any).
|
141 |
+
"""
|
142 |
if nlp is None:
|
143 |
raise HTTPException(status_code=500, detail="spaCy model not available")
|
144 |
|
|
|
182 |
context_words = [t.text for t in sentence_tokens[context_start:context_end]]
|
183 |
|
184 |
return {
|
185 |
+
"word": target_token.text,
|
186 |
+
"lemma": target_token.lemma_,
|
187 |
+
"pos": target_token.pos_,
|
188 |
+
"tag": target_token.tag_,
|
189 |
+
"is_title": target_token.is_title,
|
190 |
+
"is_upper": target_token.is_upper,
|
191 |
+
"is_lower": target_token.is_lower,
|
192 |
+
"dependency": target_token.dep_,
|
193 |
+
"context_sentence": target_token.sent.text,
|
194 |
+
"context_words": context_words,
|
195 |
+
"sentence_position": target_position_in_sentence,
|
196 |
+
"char_start": target_token.idx,
|
197 |
+
"char_end": target_token.idx + len(target_token.text),
|
198 |
+
"original_indices": {"start": start_idx, "end": end_idx},
|
199 |
}
|
200 |
|
201 |
except Exception as indices_ex:
|
|
|
204 |
|
205 |
|
206 |
def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[str, Any]]:
|
207 |
+
"""
|
208 |
+
Gets related words from WordNet and custom synonym handler,
|
209 |
+
returning a list of dictionaries containing the raw data, grouped by relation type.
|
210 |
+
|
211 |
+
Args:
|
212 |
+
word: The word to get related words for (str).
|
213 |
+
pos_tag: An optional part-of-speech tag to filter WordNet results (Optional[str]).
|
214 |
|
215 |
+
Returns:
|
216 |
+
A list of dictionaries, where each dictionary represents a group of related words (list[dict[str, Any]]).
|
217 |
+
"""
|
218 |
+
related_word_groups_raw: list[dict[str, Any]] = []
|
219 |
word_lower = word.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
+
def _get_related_words(related_object, relation_type: TermRelationships, inner_word_lower: str):
|
222 |
+
related_words = []
|
223 |
+
|
224 |
+
if relation_type == TermRelationships.SYNONYM:
|
225 |
+
# related_object is a Synset
|
226 |
+
for local_lemma in related_object.lemmas():
|
227 |
+
lemma_name = local_lemma.name().replace("_", " ")
|
228 |
+
if lemma_name.lower() != inner_word_lower:
|
229 |
+
related_words.append({
|
230 |
+
"base_form": lemma_name
|
231 |
+
})
|
232 |
+
elif relation_type == TermRelationships.ANTONYM:
|
233 |
+
# related_object is a Lemma
|
234 |
+
for ant in related_object.antonyms():
|
235 |
+
ant_name = ant.name().replace("_", " ")
|
236 |
+
if ant_name.lower() != inner_word_lower:
|
237 |
+
related_words.append({
|
238 |
+
"base_form": ant_name
|
239 |
+
})
|
240 |
+
else:
|
241 |
+
# related_object is a Synset
|
242 |
+
# Get related synsets from the appropriate method
|
243 |
+
relation_methods = {
|
244 |
+
TermRelationships.HYPERNYM: related_object.hypernyms,
|
245 |
+
TermRelationships.HYPONYM: related_object.hyponyms,
|
246 |
+
TermRelationships.MERONYM: lambda: related_object.member_meronyms() + related_object.substance_meronyms() + related_object.part_meronyms(),
|
247 |
+
TermRelationships.HOLONYM: lambda: related_object.member_holonyms() + related_object.substance_holonyms() + related_object.part_holonyms(),
|
248 |
+
TermRelationships.ALSO_SEE: related_object.also_sees,
|
249 |
+
TermRelationships.CAUSE: related_object.causes,
|
250 |
+
# TermRelationships.DERIVATIONALLY_RELATED_FORM: related_object.derivationally_related_forms,
|
251 |
+
# TermRelationships.ENTAILMENT: related_object.entails,
|
252 |
+
# TermRelationships.PERTAINYM: related_object.pertainyms,
|
253 |
+
TermRelationships.SIMILAR_TO: related_object.similar_tos,
|
254 |
+
}
|
255 |
+
get_words_fn = relation_methods.get(relation_type)
|
256 |
+
if get_words_fn:
|
257 |
+
for related_synset in get_words_fn():
|
258 |
+
# Some methods return Lemma objects, handle both cases
|
259 |
+
if hasattr(related_synset, "lemmas"):
|
260 |
+
for local_lemma in related_synset.lemmas():
|
261 |
+
lemma_name = local_lemma.name().replace("_", " ")
|
262 |
+
if lemma_name.lower() != inner_word_lower:
|
263 |
+
related_words.append({
|
264 |
+
"base_form": lemma_name,
|
265 |
+
# "is_custom": False,
|
266 |
+
})
|
267 |
+
elif hasattr(related_synset, "name"):
|
268 |
+
lemma_name = related_synset.name().replace("_", " ")
|
269 |
+
if lemma_name.lower() != inner_word_lower:
|
270 |
+
related_words.append({
|
271 |
+
"base_form": lemma_name,
|
272 |
+
# "is_custom": False,
|
273 |
+
})
|
274 |
+
|
275 |
+
if related_words:
|
276 |
+
return {
|
277 |
+
"relation_type": relation_type,
|
278 |
+
"source": "wordnet",
|
279 |
+
"definition": related_object.definition() if hasattr(related_object, "definition") else "",
|
280 |
+
"examples": related_object.examples()[:2] if hasattr(related_object, "examples") else [],
|
281 |
+
"wordnet_pos": related_object.pos() if hasattr(related_object, "pos") else None,
|
282 |
+
"related_words": related_words,
|
283 |
+
}
|
284 |
+
return None
|
285 |
+
|
286 |
+
# 1. Custom Related Word Lookup (all relationships)
|
287 |
+
for rel_type in TermRelationships:
|
288 |
+
custom_groups = custom_synonym_handler.get_related(word_lower, rel_type)
|
289 |
+
if custom_groups:
|
290 |
+
for related in custom_groups:
|
291 |
+
words = related["words"]
|
292 |
+
definition = related.get("definition", "")
|
293 |
+
related_word_options = []
|
294 |
+
for word_from_related_words in words:
|
295 |
+
related_word_options.append({
|
296 |
+
"base_form": word_from_related_words,
|
297 |
+
"is_custom": True,
|
298 |
+
"definition": definition,
|
299 |
+
})
|
300 |
+
related_word_groups_raw.append({
|
301 |
+
"relation_type": rel_type,
|
302 |
+
"source": "custom",
|
303 |
+
"definition": definition,
|
304 |
+
"examples": [],
|
305 |
+
"wordnet_pos": None,
|
306 |
+
"related_words": related_word_options,
|
307 |
+
})
|
308 |
+
# 2. WordNet Lookup
|
309 |
try:
|
310 |
# Map spaCy POS to wn POS
|
311 |
pos_map = {
|
312 |
+
"NOUN": wn.NOUN,
|
313 |
+
"VERB": wn.VERB,
|
314 |
+
"ADJ": wn.ADJ,
|
315 |
+
"ADV": wn.ADV,
|
316 |
}
|
317 |
|
318 |
# Get all synsets for the word
|
|
|
322 |
if pos_tag and pos_tag in pos_map:
|
323 |
synsets = [s for s in synsets if s.pos() == pos_map[pos_tag]]
|
324 |
|
325 |
+
# Process each synset and its relations
|
326 |
for synset in synsets:
|
327 |
+
result = _get_related_words(synset, TermRelationships.SYNONYM, word_lower)
|
328 |
+
if result:
|
329 |
+
related_word_groups_raw.append(result)
|
330 |
+
# todo: check if it's possible to remove the first 'IF result:...' and move the second one
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
for lemma in synset.lemmas():
|
332 |
+
result = _get_related_words(lemma, TermRelationships.ANTONYM, word_lower)
|
333 |
+
if result:
|
334 |
+
related_word_groups_raw.append(result)
|
335 |
+
for rel_type in [
|
336 |
+
TermRelationships.HYPERNYM, TermRelationships.HYPONYM, TermRelationships.MERONYM,
|
337 |
+
TermRelationships.HOLONYM, TermRelationships.ALSO_SEE, TermRelationships.CAUSE,
|
338 |
+
# todo: try to understand how to fix the related missing methods
|
339 |
+
# TermRelationships.DERIVATIONALLY_RELATED_FORM,
|
340 |
+
# TermRelationships.ENTAILMENT,
|
341 |
+
# TermRelationships.PERTAINYM,
|
342 |
+
TermRelationships.SIMILAR_TO
|
343 |
+
]:
|
344 |
+
result = _get_related_words(synset, rel_type, word_lower)
|
345 |
+
if result:
|
346 |
+
related_word_groups_raw.append(result)
|
347 |
|
348 |
except Exception as ex1:
|
349 |
+
app_logger.error(f"Error getting wn synonyms: '{ex1}' with: word:{type(word)}, '{word}', pos_tag: {type(pos_tag)}, '{pos_tag}'")
|
350 |
+
raise HTTPException(status_code=500, detail=f"Error retrieving related words: '{str(ex1)}'")
|
|
|
|
|
|
|
|
|
351 |
|
352 |
+
return related_word_groups_raw
|
353 |
|
354 |
|
355 |
def inflect_synonym(synonym: str, original_token_info: dict[str, Any]) -> str:
|
|
|
358 |
if nlp is None:
|
359 |
return synonym
|
360 |
|
361 |
+
pos = original_token_info.get("pos")
|
362 |
+
tag = original_token_info.get("tag")
|
363 |
|
364 |
# Handle capitalization first using .get() for safety
|
365 |
+
if original_token_info.get("is_title"):
|
366 |
synonym = synonym.title() # .title() is better for multi-word phrases
|
367 |
+
elif original_token_info.get("is_upper"):
|
368 |
synonym = synonym.upper()
|
369 |
+
elif original_token_info.get("is_lower", True): # Default to lower
|
370 |
synonym = synonym.lower()
|
371 |
|
372 |
# Handle grammatical inflection
|
373 |
try:
|
374 |
# Define all tags that require inflection in one place
|
375 |
inflection_tags = {
|
376 |
+
"NOUN": ["NNS", "NNPS"],
|
377 |
+
"VERB": ["VBD", "VBN", "VBZ", "VBG"],
|
378 |
+
"ADJ": ["JJR", "JJS"],
|
379 |
}
|
380 |
|
381 |
# Single check for all inflection cases
|
|
|
395 |
return synonym
|
396 |
|
397 |
|
398 |
+
def process_synonym_groups(word: str, context_info: dict[str, Any]) -> list[RelatedWordGroup]:
|
399 |
+
"""Process given related word groups with inflection matching
|
400 |
+
|
401 |
+
Args:
|
402 |
+
word (str): the word
|
403 |
+
context_info (dict[str, Any]): the original form of data
|
404 |
+
|
405 |
+
Returns:
|
406 |
+
list[RelatedWordGroup]: List of the processed related words
|
407 |
+
"""
|
408 |
+
# Get related words from wn
|
409 |
t0 = datetime.now()
|
410 |
+
# Get related words from wn using the lemma
|
411 |
+
related_words_raw = get_wordnet_synonyms(context_info["lemma"], context_info["pos"])
|
412 |
t1 = datetime.now()
|
413 |
duration = (t1 - t0).total_seconds()
|
414 |
app_logger.info(f"# 1/Got get_wordnet_synonyms result with '{word}' word in {duration:.3f}s.")
|
415 |
|
416 |
+
if not related_words_raw:
|
417 |
return []
|
418 |
|
419 |
+
# Process each related word group
|
420 |
+
processed_groups: list[RelatedWordGroup] = []
|
421 |
+
for related_group in related_words_raw:
|
422 |
+
app_logger.info(f"related_group:'{related_group}'")
|
423 |
+
relation_type = related_group["relation_type"]
|
424 |
+
definition = related_group.get("definition", "")
|
425 |
+
examples = related_group.get("examples", [])
|
426 |
+
wordnet_pos = related_group.get("wordnet_pos")
|
427 |
+
related_words = related_group["related_words"]
|
428 |
+
processed_options: list[RelatedWordOption] = []
|
429 |
+
|
430 |
+
for related_word in related_words:
|
431 |
+
base_form = related_word["base_form"]
|
432 |
+
inflected_form = inflect_synonym(base_form, context_info)
|
433 |
+
|
434 |
+
related_word_option = RelatedWordOption(
|
435 |
+
base_form=base_form,
|
436 |
+
inflected_form=inflected_form,
|
437 |
+
matches_context=inflected_form.lower() != base_form.lower()
|
438 |
+
)
|
439 |
+
if "is_custom" in related_word:
|
440 |
+
related_word_option.is_custom = related_word["is_custom"]
|
441 |
+
processed_options.append(related_word_option)
|
442 |
+
app_logger.info(f"wordnet_pos:{type(wordnet_pos)}, '{wordnet_pos}'")
|
443 |
+
processed_groups.append(
|
444 |
+
RelatedWordGroup(
|
445 |
+
relation_type=relation_type,
|
446 |
+
definition=definition,
|
447 |
+
examples=examples,
|
448 |
+
related_words=processed_options,
|
449 |
+
wordnet_pos=wordnet_pos
|
450 |
+
)
|
451 |
+
)
|
452 |
+
return processed_groups
|
my_ghost_writer/type_hints.py
CHANGED
@@ -1,23 +1,41 @@
|
|
1 |
-
from
|
2 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
class RelatedEntry(BaseModel):
|
6 |
-
|
|
|
7 |
words: list[str]
|
8 |
-
definition: Optional[str] = None
|
9 |
|
10 |
|
11 |
-
class
|
12 |
word: str
|
13 |
related: list[RelatedEntry]
|
14 |
|
15 |
-
# @field_validator("synonyms")
|
16 |
-
# def synonyms_must_not_be_empty(cls, v):
|
17 |
-
# if not v:
|
18 |
-
# raise ValueError("Synonym list cannot be empty.")
|
19 |
-
# return v
|
20 |
-
|
21 |
|
22 |
class SynonymInfo(TypedDict):
|
23 |
synonym: str
|
@@ -50,17 +68,19 @@ class RequestQueryThesaurusInflatedBody(BaseModel):
|
|
50 |
word: str
|
51 |
|
52 |
|
53 |
-
class
|
54 |
base_form: str
|
55 |
inflected_form: str
|
56 |
matches_context: bool
|
|
|
57 |
|
58 |
|
59 |
-
class
|
|
|
60 |
definition: str
|
61 |
examples: list[str]
|
62 |
-
wordnet_pos: str
|
63 |
-
|
64 |
|
65 |
|
66 |
class ContextInfo(BaseModel):
|
@@ -76,25 +96,40 @@ class SingleWordSynonymResponse(BaseModel):
|
|
76 |
original_word: str
|
77 |
original_indices: dict[str, int]
|
78 |
context_info: ContextInfo
|
79 |
-
synonym_groups: list[
|
80 |
message: Optional[str] = None
|
81 |
debug_info: Optional[dict[str, Any]] = None
|
82 |
|
83 |
|
84 |
-
class
|
85 |
original_word: str
|
86 |
original_indices: dict[str, int]
|
87 |
context_info: ContextInfo
|
88 |
-
|
|
|
89 |
debug_info: Optional[dict[str, Any]] = None
|
90 |
|
91 |
|
92 |
-
class
|
93 |
success: bool
|
94 |
original_phrase: str
|
95 |
original_indices: dict[str, int]
|
96 |
-
results: list[
|
97 |
message: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
|
100 |
class HealthCheckResponse(BaseModel):
|
@@ -105,7 +140,7 @@ class HealthCheckResponse(BaseModel):
|
|
105 |
|
106 |
class InputTextRow(TypedDict):
|
107 |
"""
|
108 |
-
TypedDict for input text row.
|
109 |
"""
|
110 |
idxRow: int
|
111 |
text: str
|
|
|
1 |
+
from decimal import Decimal
|
2 |
+
from enum import Enum
|
3 |
+
from typing import Any, Optional, TypedDict, Union
|
4 |
+
|
5 |
+
from pydantic import BaseModel, field_validator, Field, field_serializer
|
6 |
+
|
7 |
+
decimal_places = 4
|
8 |
+
|
9 |
+
|
10 |
+
class TermRelationships(str, Enum):
|
11 |
+
"""
|
12 |
+
An enumeration representing the possible relationships between terms.
|
13 |
+
"""
|
14 |
+
ALSO_SEE = "also_see"
|
15 |
+
ANTONYM = "antonym"
|
16 |
+
CAUSE = "cause"
|
17 |
+
DERIVATIONALLY_RELATED_FORM = "derivationally_related_form"
|
18 |
+
ENTAILMENT = "entailment"
|
19 |
+
HOLONYM = "holonym"
|
20 |
+
HYPERNYM = "hypernym"
|
21 |
+
HYPONYM = "hyponym"
|
22 |
+
MERONYM = "meronym"
|
23 |
+
PERTAINYM = "pertainym"
|
24 |
+
SIMILAR_TO = "similar_to"
|
25 |
+
SYNONYM = "synonym"
|
26 |
|
27 |
|
28 |
class RelatedEntry(BaseModel):
|
29 |
+
"""Represents a related entry for a custom-related word."""
|
30 |
+
type: TermRelationships
|
31 |
words: list[str]
|
32 |
+
definition: Optional[str] = None
|
33 |
|
34 |
|
35 |
+
class CustomRelatedWordRequest(BaseModel):
|
36 |
word: str
|
37 |
related: list[RelatedEntry]
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
class SynonymInfo(TypedDict):
|
41 |
synonym: str
|
|
|
68 |
word: str
|
69 |
|
70 |
|
71 |
+
class RelatedWordOption(BaseModel):
|
72 |
base_form: str
|
73 |
inflected_form: str
|
74 |
matches_context: bool
|
75 |
+
is_custom: Optional[bool] = None
|
76 |
|
77 |
|
78 |
+
class RelatedWordGroup(BaseModel):
|
79 |
+
relation_type: TermRelationships
|
80 |
definition: str
|
81 |
examples: list[str]
|
82 |
+
wordnet_pos: Optional[str]
|
83 |
+
related_words: list[RelatedWordOption]
|
84 |
|
85 |
|
86 |
class ContextInfo(BaseModel):
|
|
|
96 |
original_word: str
|
97 |
original_indices: dict[str, int]
|
98 |
context_info: ContextInfo
|
99 |
+
synonym_groups: list[RelatedWordGroup]
|
100 |
message: Optional[str] = None
|
101 |
debug_info: Optional[dict[str, Any]] = None
|
102 |
|
103 |
|
104 |
+
class RelatedWordWordResult(BaseModel):
|
105 |
original_word: str
|
106 |
original_indices: dict[str, int]
|
107 |
context_info: ContextInfo
|
108 |
+
related_word_groups: list[RelatedWordGroup]
|
109 |
+
message: Optional[str] = None
|
110 |
debug_info: Optional[dict[str, Any]] = None
|
111 |
|
112 |
|
113 |
+
class MultiRelatedWordResponse(BaseModel):
|
114 |
success: bool
|
115 |
original_phrase: str
|
116 |
original_indices: dict[str, int]
|
117 |
+
results: list[RelatedWordWordResult]
|
118 |
message: Optional[str] = None
|
119 |
+
duration: Optional[Decimal] = Field(gt=0, decimal_places=decimal_places)
|
120 |
+
|
121 |
+
@field_validator('duration', mode="before")
|
122 |
+
def validate_duration(cls, v: Union[float, Decimal, str, None]) -> Optional[Decimal]:
|
123 |
+
if v is None:
|
124 |
+
return v
|
125 |
+
return Decimal(f"{v:.{decimal_places}f}")
|
126 |
+
|
127 |
+
@field_serializer('duration')
|
128 |
+
def serialize_duration(self, value: Optional[Decimal]) -> Optional[float]:
|
129 |
+
"""Serialize Decimal as float for JSON output"""
|
130 |
+
if value is None:
|
131 |
+
return None
|
132 |
+
return float(value)
|
133 |
|
134 |
|
135 |
class HealthCheckResponse(BaseModel):
|
|
|
140 |
|
141 |
class InputTextRow(TypedDict):
|
142 |
"""
|
143 |
+
TypedDict for an input text row.
|
144 |
"""
|
145 |
idxRow: int
|
146 |
text: str
|
poetry.lock
CHANGED
@@ -601,6 +601,22 @@ files = [
|
|
601 |
{file = "joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444"},
|
602 |
]
|
603 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
604 |
[[package]]
|
605 |
name = "langcodes"
|
606 |
version = "3.5.0"
|
@@ -1004,6 +1020,18 @@ files = [
|
|
1004 |
dev = ["pre-commit", "tox"]
|
1005 |
testing = ["coverage", "pytest", "pytest-benchmark"]
|
1006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1007 |
[[package]]
|
1008 |
name = "preshed"
|
1009 |
version = "3.0.10"
|
@@ -2136,4 +2164,4 @@ files = [
|
|
2136 |
[metadata]
|
2137 |
lock-version = "2.1"
|
2138 |
python-versions = ">=3.10,<3.14.0"
|
2139 |
-
content-hash = "
|
|
|
601 |
{file = "joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444"},
|
602 |
]
|
603 |
|
604 |
+
[[package]]
|
605 |
+
name = "jsonpath-ng"
|
606 |
+
version = "1.7.0"
|
607 |
+
description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
|
608 |
+
optional = false
|
609 |
+
python-versions = "*"
|
610 |
+
groups = ["test"]
|
611 |
+
files = [
|
612 |
+
{file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
|
613 |
+
{file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
|
614 |
+
{file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
|
615 |
+
]
|
616 |
+
|
617 |
+
[package.dependencies]
|
618 |
+
ply = "*"
|
619 |
+
|
620 |
[[package]]
|
621 |
name = "langcodes"
|
622 |
version = "3.5.0"
|
|
|
1020 |
dev = ["pre-commit", "tox"]
|
1021 |
testing = ["coverage", "pytest", "pytest-benchmark"]
|
1022 |
|
1023 |
+
[[package]]
|
1024 |
+
name = "ply"
|
1025 |
+
version = "3.11"
|
1026 |
+
description = "Python Lex & Yacc"
|
1027 |
+
optional = false
|
1028 |
+
python-versions = "*"
|
1029 |
+
groups = ["test"]
|
1030 |
+
files = [
|
1031 |
+
{file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
|
1032 |
+
{file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
|
1033 |
+
]
|
1034 |
+
|
1035 |
[[package]]
|
1036 |
name = "preshed"
|
1037 |
version = "3.0.10"
|
|
|
2164 |
[metadata]
|
2165 |
lock-version = "2.1"
|
2166 |
python-versions = ">=3.10,<3.14.0"
|
2167 |
+
content-hash = "ae8867d9a28d2dee6521df2f2ba249d664340d8e3b0ff65260e6ff70e45d1839"
|
pyproject.toml
CHANGED
@@ -28,6 +28,7 @@ optional = true
|
|
28 |
pytest = "^8.3.5"
|
29 |
pytest-cov = "^6.1.1"
|
30 |
httpx = "^0.28.1"
|
|
|
31 |
|
32 |
[tool.poetry.group.webserver]
|
33 |
optional = true
|
|
|
28 |
pytest = "^8.3.5"
|
29 |
pytest-cov = "^6.1.1"
|
30 |
httpx = "^0.28.1"
|
31 |
+
jsonpath-ng = "^1.7.0"
|
32 |
|
33 |
[tool.poetry.group.webserver]
|
34 |
optional = true
|
requirements-test.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
httpx==0.28.1
|
2 |
pytest-cov==6.2.1
|
3 |
-
pytest==8.4.1
|
|
|
1 |
httpx==0.28.1
|
2 |
pytest-cov==6.2.1
|
3 |
+
pytest==8.4.1
|
requirements-webserver.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
asgi-correlation-id==4.3.4
|
2 |
fastapi==0.115.14
|
|
|
3 |
requests==2.32.4
|
4 |
uvicorn==0.34.3
|
|
|
1 |
asgi-correlation-id==4.3.4
|
2 |
fastapi==0.115.14
|
3 |
+
pymongo==4.13.2
|
4 |
requests==2.32.4
|
5 |
uvicorn==0.34.3
|
requirements.txt
CHANGED
@@ -2,5 +2,7 @@ nltk==3.9.1
|
|
2 |
pyinflect==0.5.1
|
3 |
pymongo==4.13.2
|
4 |
python-dotenv==1.1.1
|
|
|
|
|
5 |
spacy==3.8.7
|
6 |
-
structlog==25.4.0
|
|
|
2 |
pyinflect==0.5.1
|
3 |
pymongo==4.13.2
|
4 |
python-dotenv==1.1.1
|
5 |
+
spacy-legacy==3.0.12
|
6 |
+
spacy-loggers==1.0.5
|
7 |
spacy==3.8.7
|
8 |
+
structlog==25.4.0
|
tests/events/get_wordnet_synonyms_custom_entry_happy.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 1}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["NoneType"], "primary_type": "NoneType", "is_array": false}}, {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 9}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false}}, {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "array_length": 8}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false}}]
|
tests/events/get_wordnet_synonyms_piano_ok1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds"], "sample_count": 1}, "$.examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 0}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["pianoforte", "forte-piano"], "sample_count": 2}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false, "samples": ["TermRelationships.SYNONYM"], "sample_count": 1}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["wordnet"], "sample_count": 1}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["n"], "sample_count": 1}}
|
tests/events/request_thesaurus_custom4.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word": "happy",
|
3 |
+
"related": [
|
4 |
+
{
|
5 |
+
"definition": "def happy 1 - custom.",
|
6 |
+
"type": "synonym",
|
7 |
+
"words": ["joyful", "cheerful"]
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"definition": "def happy 2 - custom.",
|
11 |
+
"type": "synonym",
|
12 |
+
"words": ["joy", "cheer", "elated"]
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"definition": "def sad - custom.",
|
16 |
+
"type": "antonym",
|
17 |
+
"words": ["sad", "unhappy"]
|
18 |
+
}
|
19 |
+
]
|
20 |
+
}
|
tests/events/response_thesaurus_phrase_inflated.json
CHANGED
@@ -31,15 +31,182 @@
|
|
31 |
],
|
32 |
"dependency": "amod"
|
33 |
},
|
34 |
-
"
|
35 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
"definition": "give a certain impression or have a certain outward aspect",
|
37 |
"examples": [
|
38 |
"She seems to be sleeping",
|
39 |
"This appears to be a very difficult problem"
|
40 |
],
|
41 |
"wordnet_pos": "v",
|
42 |
-
"
|
43 |
{
|
44 |
"base_form": "appear",
|
45 |
"inflected_form": "appearing",
|
@@ -53,235 +220,1474 @@
|
|
53 |
]
|
54 |
},
|
55 |
{
|
56 |
-
"
|
|
|
57 |
"examples": [
|
58 |
-
"
|
59 |
-
"
|
60 |
],
|
61 |
"wordnet_pos": "v",
|
62 |
-
"
|
63 |
{
|
64 |
-
"base_form": "
|
65 |
-
"inflected_form": "
|
66 |
"matches_context": true
|
67 |
}
|
68 |
]
|
69 |
},
|
70 |
{
|
71 |
-
"
|
|
|
72 |
"examples": [
|
73 |
-
"
|
74 |
-
"
|
75 |
],
|
76 |
"wordnet_pos": "v",
|
77 |
-
"
|
78 |
{
|
79 |
-
"base_form": "
|
80 |
-
"inflected_form": "
|
81 |
"matches_context": true
|
82 |
},
|
83 |
{
|
84 |
-
"base_form": "
|
85 |
-
"inflected_form": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
"matches_context": true
|
87 |
}
|
88 |
]
|
89 |
},
|
90 |
{
|
91 |
-
"
|
|
|
92 |
"examples": [
|
93 |
-
"
|
94 |
-
"
|
95 |
],
|
96 |
"wordnet_pos": "v",
|
97 |
-
"
|
98 |
-
{
|
99 |
-
"base_form": "attend",
|
100 |
-
"inflected_form": "attending",
|
101 |
-
"matches_context": true
|
102 |
-
},
|
103 |
{
|
104 |
-
"base_form": "
|
105 |
-
"inflected_form": "
|
106 |
"matches_context": true
|
107 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
{
|
109 |
-
"base_form": "
|
110 |
-
"inflected_form": "
|
111 |
"matches_context": true
|
112 |
}
|
113 |
]
|
114 |
},
|
115 |
{
|
116 |
-
"
|
|
|
117 |
"examples": [
|
118 |
-
"We
|
119 |
-
"
|
120 |
],
|
121 |
"wordnet_pos": "v",
|
122 |
-
"
|
123 |
{
|
124 |
-
"base_form": "
|
125 |
-
"inflected_form": "
|
126 |
"matches_context": true
|
127 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
{
|
129 |
-
"base_form": "
|
130 |
-
"inflected_form": "
|
131 |
"matches_context": true
|
132 |
},
|
133 |
{
|
134 |
-
"base_form": "
|
135 |
-
"inflected_form": "
|
136 |
"matches_context": true
|
137 |
}
|
138 |
]
|
139 |
},
|
140 |
{
|
141 |
-
"
|
|
|
142 |
"examples": [
|
143 |
-
"
|
144 |
-
"Look
|
145 |
],
|
146 |
"wordnet_pos": "v",
|
147 |
-
"
|
148 |
{
|
149 |
-
"base_form": "
|
150 |
-
"inflected_form": "
|
151 |
"matches_context": true
|
152 |
},
|
153 |
{
|
154 |
-
"base_form": "
|
155 |
-
"inflected_form": "
|
156 |
"matches_context": true
|
157 |
},
|
158 |
{
|
159 |
-
"base_form": "
|
160 |
-
"inflected_form": "
|
161 |
"matches_context": true
|
162 |
},
|
163 |
{
|
164 |
-
"base_form": "
|
165 |
-
"inflected_form": "
|
166 |
"matches_context": true
|
167 |
},
|
168 |
{
|
169 |
-
"base_form": "
|
170 |
-
"inflected_form": "
|
171 |
"matches_context": true
|
172 |
},
|
173 |
{
|
174 |
-
"base_form": "
|
175 |
-
"inflected_form": "
|
176 |
"matches_context": true
|
177 |
},
|
178 |
{
|
179 |
-
"base_form": "
|
180 |
-
"inflected_form": "
|
181 |
"matches_context": true
|
182 |
},
|
183 |
{
|
184 |
-
"base_form": "
|
185 |
-
"inflected_form": "
|
186 |
"matches_context": true
|
187 |
}
|
188 |
]
|
189 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
],
|
191 |
"debug_info": {
|
192 |
"spacy_token_indices": {
|
193 |
"start": 42,
|
194 |
"end": 49
|
195 |
},
|
196 |
-
"lemma": "look"
|
197 |
-
}
|
198 |
-
},
|
199 |
-
{
|
200 |
-
"original_word": "woman",
|
201 |
-
"original_indices": {
|
202 |
-
"start": 22,
|
203 |
-
"end": 27
|
204 |
-
},
|
205 |
-
"context_info": {
|
206 |
-
"pos": "NOUN",
|
207 |
-
"sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
208 |
-
"grammatical_form": "NN",
|
209 |
-
"context_words": [
|
210 |
-
"a",
|
211 |
-
"rather",
|
212 |
-
"severe",
|
213 |
-
"-",
|
214 |
-
"looking",
|
215 |
-
"woman",
|
216 |
-
"who",
|
217 |
-
"was",
|
218 |
-
"wearing",
|
219 |
-
"square",
|
220 |
-
"glasses"
|
221 |
-
],
|
222 |
-
"dependency": "pobj"
|
223 |
-
},
|
224 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
{
|
|
|
226 |
"definition": "an adult female person (as opposed to a man)",
|
227 |
"examples": [
|
228 |
"the woman kept house while the man hunted"
|
229 |
],
|
230 |
"wordnet_pos": "n",
|
231 |
-
"
|
232 |
{
|
233 |
-
"base_form": "adult female",
|
234 |
-
"inflected_form": "adult female",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
"matches_context": false
|
236 |
}
|
237 |
]
|
238 |
},
|
239 |
{
|
|
|
240 |
"definition": "a human female employed to do housework",
|
241 |
"examples": [
|
242 |
"the char will clean the carpet",
|
243 |
"I have a woman who comes in four hours a day while I write"
|
244 |
],
|
245 |
"wordnet_pos": "n",
|
246 |
-
"
|
|
|
|
|
|
|
|
|
|
|
247 |
{
|
248 |
"base_form": "char",
|
249 |
"inflected_form": "char",
|
250 |
"matches_context": false
|
251 |
},
|
252 |
{
|
253 |
-
"base_form": "
|
254 |
-
"inflected_form": "
|
255 |
"matches_context": false
|
256 |
},
|
257 |
{
|
258 |
"base_form": "cleaning lady",
|
259 |
"inflected_form": "cleaning lady",
|
260 |
"matches_context": false
|
261 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
{
|
263 |
-
"base_form": "
|
264 |
-
"inflected_form": "
|
265 |
"matches_context": false
|
266 |
}
|
267 |
]
|
268 |
},
|
269 |
{
|
|
|
270 |
"definition": "women as a class",
|
271 |
"examples": [
|
272 |
"it's an insult to American womanhood",
|
273 |
"woman is the glory of creation"
|
274 |
],
|
275 |
"wordnet_pos": "n",
|
276 |
-
"
|
|
|
|
|
|
|
|
|
|
|
277 |
{
|
278 |
"base_form": "fair sex",
|
279 |
"inflected_form": "fair sex",
|
280 |
"matches_context": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
},
|
282 |
{
|
283 |
-
"base_form": "
|
284 |
-
"inflected_form": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
"matches_context": false
|
286 |
}
|
287 |
]
|
@@ -296,5 +1702,6 @@
|
|
296 |
}
|
297 |
}
|
298 |
],
|
299 |
-
"message": "Got 2 synonym groups."
|
|
|
300 |
}
|
|
|
31 |
],
|
32 |
"dependency": "amod"
|
33 |
},
|
34 |
+
"related_word_groups": [
|
35 |
{
|
36 |
+
"relation_type": "hyponym",
|
37 |
+
"definition": "perceive with attention; direct one's gaze towards",
|
38 |
+
"examples": [
|
39 |
+
"She looked over the expanse of land",
|
40 |
+
"Look at your child!"
|
41 |
+
],
|
42 |
+
"wordnet_pos": "v",
|
43 |
+
"related_words": [
|
44 |
+
{
|
45 |
+
"base_form": "goggle",
|
46 |
+
"inflected_form": "goggling",
|
47 |
+
"matches_context": true
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"base_form": "gape",
|
51 |
+
"inflected_form": "gaping",
|
52 |
+
"matches_context": true
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"base_form": "gawp",
|
56 |
+
"inflected_form": "gawping",
|
57 |
+
"matches_context": true
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"base_form": "gawk",
|
61 |
+
"inflected_form": "gawking",
|
62 |
+
"matches_context": true
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"base_form": "gloat",
|
66 |
+
"inflected_form": "gloating",
|
67 |
+
"matches_context": true
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"base_form": "look around",
|
71 |
+
"inflected_form": "looking around",
|
72 |
+
"matches_context": true
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"base_form": "ogle",
|
76 |
+
"inflected_form": "ogling",
|
77 |
+
"matches_context": true
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"base_form": "give the glad eye",
|
81 |
+
"inflected_form": "giving the glad eye",
|
82 |
+
"matches_context": true
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"base_form": "peep",
|
86 |
+
"inflected_form": "peeping",
|
87 |
+
"matches_context": true
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"base_form": "look back",
|
91 |
+
"inflected_form": "looking back",
|
92 |
+
"matches_context": true
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"base_form": "look backward",
|
96 |
+
"inflected_form": "looking backward",
|
97 |
+
"matches_context": true
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"base_form": "regard",
|
101 |
+
"inflected_form": "regarding",
|
102 |
+
"matches_context": true
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"base_form": "consider",
|
106 |
+
"inflected_form": "considering",
|
107 |
+
"matches_context": true
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"base_form": "glance",
|
111 |
+
"inflected_form": "glancing",
|
112 |
+
"matches_context": true
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"base_form": "peek",
|
116 |
+
"inflected_form": "peeking",
|
117 |
+
"matches_context": true
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"base_form": "glint",
|
121 |
+
"inflected_form": "glinting",
|
122 |
+
"matches_context": true
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"base_form": "eye",
|
126 |
+
"inflected_form": "eyeing",
|
127 |
+
"matches_context": true
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"base_form": "eyeball",
|
131 |
+
"inflected_form": "eyeballing",
|
132 |
+
"matches_context": true
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"base_form": "peer",
|
136 |
+
"inflected_form": "peering",
|
137 |
+
"matches_context": true
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"base_form": "admire",
|
141 |
+
"inflected_form": "admiring",
|
142 |
+
"matches_context": true
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"base_form": "gaze",
|
146 |
+
"inflected_form": "gazing",
|
147 |
+
"matches_context": true
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"base_form": "stare",
|
151 |
+
"inflected_form": "staring",
|
152 |
+
"matches_context": true
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"base_form": "look away",
|
156 |
+
"inflected_form": "looking away",
|
157 |
+
"matches_context": true
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"base_form": "leer",
|
161 |
+
"inflected_form": "leering",
|
162 |
+
"matches_context": true
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"base_form": "give the eye",
|
166 |
+
"inflected_form": "giving the eye",
|
167 |
+
"matches_context": true
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"base_form": "give the once over",
|
171 |
+
"inflected_form": "giving the once over",
|
172 |
+
"matches_context": true
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"base_form": "squint",
|
176 |
+
"inflected_form": "squinting",
|
177 |
+
"matches_context": true
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"base_form": "take a look",
|
181 |
+
"inflected_form": "taking a look",
|
182 |
+
"matches_context": true
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"base_form": "have a look",
|
186 |
+
"inflected_form": "having a look",
|
187 |
+
"matches_context": true
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"base_form": "get a load",
|
191 |
+
"inflected_form": "getting a load",
|
192 |
+
"matches_context": true
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"base_form": "stare",
|
196 |
+
"inflected_form": "staring",
|
197 |
+
"matches_context": true
|
198 |
+
}
|
199 |
+
]
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"relation_type": "synonym",
|
203 |
"definition": "give a certain impression or have a certain outward aspect",
|
204 |
"examples": [
|
205 |
"She seems to be sleeping",
|
206 |
"This appears to be a very difficult problem"
|
207 |
],
|
208 |
"wordnet_pos": "v",
|
209 |
+
"related_words": [
|
210 |
{
|
211 |
"base_form": "appear",
|
212 |
"inflected_form": "appearing",
|
|
|
220 |
]
|
221 |
},
|
222 |
{
|
223 |
+
"relation_type": "hypernym",
|
224 |
+
"definition": "give a certain impression or have a certain outward aspect",
|
225 |
"examples": [
|
226 |
+
"She seems to be sleeping",
|
227 |
+
"This appears to be a very difficult problem"
|
228 |
],
|
229 |
"wordnet_pos": "v",
|
230 |
+
"related_words": [
|
231 |
{
|
232 |
+
"base_form": "be",
|
233 |
+
"inflected_form": "being",
|
234 |
"matches_context": true
|
235 |
}
|
236 |
]
|
237 |
},
|
238 |
{
|
239 |
+
"relation_type": "hyponym",
|
240 |
+
"definition": "give a certain impression or have a certain outward aspect",
|
241 |
"examples": [
|
242 |
+
"She seems to be sleeping",
|
243 |
+
"This appears to be a very difficult problem"
|
244 |
],
|
245 |
"wordnet_pos": "v",
|
246 |
+
"related_words": [
|
247 |
{
|
248 |
+
"base_form": "glow",
|
249 |
+
"inflected_form": "glowing",
|
250 |
"matches_context": true
|
251 |
},
|
252 |
{
|
253 |
+
"base_form": "beam",
|
254 |
+
"inflected_form": "beaming",
|
255 |
+
"matches_context": true
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"base_form": "radiate",
|
259 |
+
"inflected_form": "radiating",
|
260 |
+
"matches_context": true
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"base_form": "shine",
|
264 |
+
"inflected_form": "shining",
|
265 |
+
"matches_context": true
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"base_form": "rise",
|
269 |
+
"inflected_form": "rising",
|
270 |
+
"matches_context": true
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"base_form": "lift",
|
274 |
+
"inflected_form": "lifting",
|
275 |
+
"matches_context": true
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"base_form": "rear",
|
279 |
+
"inflected_form": "rearing",
|
280 |
+
"matches_context": true
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"base_form": "glitter",
|
284 |
+
"inflected_form": "glittering",
|
285 |
+
"matches_context": true
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"base_form": "glisten",
|
289 |
+
"inflected_form": "glistening",
|
290 |
+
"matches_context": true
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"base_form": "glint",
|
294 |
+
"inflected_form": "glinting",
|
295 |
+
"matches_context": true
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"base_form": "gleam",
|
299 |
+
"inflected_form": "gleaming",
|
300 |
+
"matches_context": true
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"base_form": "shine",
|
304 |
+
"inflected_form": "shining",
|
305 |
+
"matches_context": true
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"base_form": "leap out",
|
309 |
+
"inflected_form": "leaping out",
|
310 |
+
"matches_context": true
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"base_form": "jump out",
|
314 |
+
"inflected_form": "jumping out",
|
315 |
+
"matches_context": true
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"base_form": "jump",
|
319 |
+
"inflected_form": "jumping",
|
320 |
+
"matches_context": true
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"base_form": "stand out",
|
324 |
+
"inflected_form": "standing out",
|
325 |
+
"matches_context": true
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"base_form": "stick out",
|
329 |
+
"inflected_form": "sticking out",
|
330 |
+
"matches_context": true
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"base_form": "make",
|
334 |
+
"inflected_form": "making",
|
335 |
+
"matches_context": true
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"base_form": "loom",
|
339 |
+
"inflected_form": "looming",
|
340 |
+
"matches_context": true
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"base_form": "sound",
|
344 |
+
"inflected_form": "sounding",
|
345 |
+
"matches_context": true
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"base_form": "cut",
|
349 |
+
"inflected_form": "cutting",
|
350 |
+
"matches_context": true
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"base_form": "pass off",
|
354 |
+
"inflected_form": "passing off",
|
355 |
+
"matches_context": true
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"base_form": "come across",
|
359 |
+
"inflected_form": "coming across",
|
360 |
+
"matches_context": true
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"base_form": "feel",
|
364 |
+
"inflected_form": "feeling",
|
365 |
+
"matches_context": true
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"base_form": "feel",
|
369 |
+
"inflected_form": "feeling",
|
370 |
"matches_context": true
|
371 |
}
|
372 |
]
|
373 |
},
|
374 |
{
|
375 |
+
"relation_type": "hypernym",
|
376 |
+
"definition": "have a certain outward or facial expression",
|
377 |
"examples": [
|
378 |
+
"How does she look?",
|
379 |
+
"The child looks unhappy"
|
380 |
],
|
381 |
"wordnet_pos": "v",
|
382 |
+
"related_words": [
|
|
|
|
|
|
|
|
|
|
|
383 |
{
|
384 |
+
"base_form": "be",
|
385 |
+
"inflected_form": "being",
|
386 |
"matches_context": true
|
387 |
+
}
|
388 |
+
]
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"relation_type": "hyponym",
|
392 |
+
"definition": "have a certain outward or facial expression",
|
393 |
+
"examples": [
|
394 |
+
"How does she look?",
|
395 |
+
"The child looks unhappy"
|
396 |
+
],
|
397 |
+
"wordnet_pos": "v",
|
398 |
+
"related_words": [
|
399 |
{
|
400 |
+
"base_form": "squint",
|
401 |
+
"inflected_form": "squinting",
|
402 |
"matches_context": true
|
403 |
}
|
404 |
]
|
405 |
},
|
406 |
{
|
407 |
+
"relation_type": "synonym",
|
408 |
+
"definition": "search or seek",
|
409 |
"examples": [
|
410 |
+
"We looked all day and finally found the child in the forest",
|
411 |
+
"Look elsewhere for the perfect gift!"
|
412 |
],
|
413 |
"wordnet_pos": "v",
|
414 |
+
"related_words": [
|
415 |
{
|
416 |
+
"base_form": "search",
|
417 |
+
"inflected_form": "searching",
|
418 |
"matches_context": true
|
419 |
+
}
|
420 |
+
]
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"relation_type": "hypernym",
|
424 |
+
"definition": "search or seek",
|
425 |
+
"examples": [
|
426 |
+
"We looked all day and finally found the child in the forest",
|
427 |
+
"Look elsewhere for the perfect gift!"
|
428 |
+
],
|
429 |
+
"wordnet_pos": "v",
|
430 |
+
"related_words": [
|
431 |
{
|
432 |
+
"base_form": "examine",
|
433 |
+
"inflected_form": "examining",
|
434 |
"matches_context": true
|
435 |
},
|
436 |
{
|
437 |
+
"base_form": "see",
|
438 |
+
"inflected_form": "seeing",
|
439 |
"matches_context": true
|
440 |
}
|
441 |
]
|
442 |
},
|
443 |
{
|
444 |
+
"relation_type": "hyponym",
|
445 |
+
"definition": "search or seek",
|
446 |
"examples": [
|
447 |
+
"We looked all day and finally found the child in the forest",
|
448 |
+
"Look elsewhere for the perfect gift!"
|
449 |
],
|
450 |
"wordnet_pos": "v",
|
451 |
+
"related_words": [
|
452 |
{
|
453 |
+
"base_form": "hunt",
|
454 |
+
"inflected_form": "hunting",
|
455 |
"matches_context": true
|
456 |
},
|
457 |
{
|
458 |
+
"base_form": "cruise",
|
459 |
+
"inflected_form": "cruising",
|
460 |
"matches_context": true
|
461 |
},
|
462 |
{
|
463 |
+
"base_form": "prospect",
|
464 |
+
"inflected_form": "prospecting",
|
465 |
"matches_context": true
|
466 |
},
|
467 |
{
|
468 |
+
"base_form": "intrude",
|
469 |
+
"inflected_form": "intruding",
|
470 |
"matches_context": true
|
471 |
},
|
472 |
{
|
473 |
+
"base_form": "horn in",
|
474 |
+
"inflected_form": "horning in",
|
475 |
"matches_context": true
|
476 |
},
|
477 |
{
|
478 |
+
"base_form": "pry",
|
479 |
+
"inflected_form": "prying",
|
480 |
"matches_context": true
|
481 |
},
|
482 |
{
|
483 |
+
"base_form": "nose",
|
484 |
+
"inflected_form": "nosing",
|
485 |
"matches_context": true
|
486 |
},
|
487 |
{
|
488 |
+
"base_form": "poke",
|
489 |
+
"inflected_form": "poking",
|
490 |
"matches_context": true
|
491 |
}
|
492 |
]
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"relation_type": "synonym",
|
496 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
497 |
+
"examples": [
|
498 |
+
"The house looks north",
|
499 |
+
"My backyard look onto the pond"
|
500 |
+
],
|
501 |
+
"wordnet_pos": "v",
|
502 |
+
"related_words": [
|
503 |
+
{
|
504 |
+
"base_form": "front",
|
505 |
+
"inflected_form": "fronting",
|
506 |
+
"matches_context": true
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"base_form": "face",
|
510 |
+
"inflected_form": "facing",
|
511 |
+
"matches_context": true
|
512 |
+
}
|
513 |
+
]
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"relation_type": "antonym",
|
517 |
+
"definition": "",
|
518 |
+
"examples": [],
|
519 |
+
"related_words": [
|
520 |
+
{
|
521 |
+
"base_form": "back",
|
522 |
+
"inflected_form": "backing",
|
523 |
+
"matches_context": true
|
524 |
+
}
|
525 |
+
]
|
526 |
+
},
|
527 |
+
{
|
528 |
+
"relation_type": "hypernym",
|
529 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
530 |
+
"examples": [
|
531 |
+
"The house looks north",
|
532 |
+
"My backyard look onto the pond"
|
533 |
+
],
|
534 |
+
"wordnet_pos": "v",
|
535 |
+
"related_words": [
|
536 |
+
{
|
537 |
+
"base_form": "lie",
|
538 |
+
"inflected_form": "lying",
|
539 |
+
"matches_context": true
|
540 |
+
}
|
541 |
+
]
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"relation_type": "hyponym",
|
545 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
546 |
+
"examples": [
|
547 |
+
"The house looks north",
|
548 |
+
"My backyard look onto the pond"
|
549 |
+
],
|
550 |
+
"wordnet_pos": "v",
|
551 |
+
"related_words": [
|
552 |
+
{
|
553 |
+
"base_form": "confront",
|
554 |
+
"inflected_form": "confronting",
|
555 |
+
"matches_context": true
|
556 |
+
}
|
557 |
+
]
|
558 |
+
},
|
559 |
+
{
|
560 |
+
"relation_type": "synonym",
|
561 |
+
"definition": "take charge of or deal with",
|
562 |
+
"examples": [
|
563 |
+
"Could you see about lunch?",
|
564 |
+
"I must attend to this matter"
|
565 |
+
],
|
566 |
+
"wordnet_pos": "v",
|
567 |
+
"related_words": [
|
568 |
+
{
|
569 |
+
"base_form": "attend",
|
570 |
+
"inflected_form": "attending",
|
571 |
+
"matches_context": true
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"base_form": "take care",
|
575 |
+
"inflected_form": "taking care",
|
576 |
+
"matches_context": true
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"base_form": "see",
|
580 |
+
"inflected_form": "seeing",
|
581 |
+
"matches_context": true
|
582 |
+
}
|
583 |
+
]
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"relation_type": "hypernym",
|
587 |
+
"definition": "take charge of or deal with",
|
588 |
+
"examples": [
|
589 |
+
"Could you see about lunch?",
|
590 |
+
"I must attend to this matter"
|
591 |
+
],
|
592 |
+
"wordnet_pos": "v",
|
593 |
+
"related_words": [
|
594 |
+
{
|
595 |
+
"base_form": "care",
|
596 |
+
"inflected_form": "caring",
|
597 |
+
"matches_context": true
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"base_form": "give care",
|
601 |
+
"inflected_form": "giving care",
|
602 |
+
"matches_context": true
|
603 |
+
}
|
604 |
+
]
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"relation_type": "hyponym",
|
608 |
+
"definition": "take charge of or deal with",
|
609 |
+
"examples": [
|
610 |
+
"Could you see about lunch?",
|
611 |
+
"I must attend to this matter"
|
612 |
+
],
|
613 |
+
"wordnet_pos": "v",
|
614 |
+
"related_words": [
|
615 |
+
{
|
616 |
+
"base_form": "tend",
|
617 |
+
"inflected_form": "tending",
|
618 |
+
"matches_context": true
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"base_form": "minister",
|
622 |
+
"inflected_form": "ministering",
|
623 |
+
"matches_context": true
|
624 |
+
}
|
625 |
+
]
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"relation_type": "hypernym",
|
629 |
+
"definition": "convey by one's expression",
|
630 |
+
"examples": [
|
631 |
+
"She looked her devotion to me"
|
632 |
+
],
|
633 |
+
"wordnet_pos": "v",
|
634 |
+
"related_words": [
|
635 |
+
{
|
636 |
+
"base_form": "convey",
|
637 |
+
"inflected_form": "conveying",
|
638 |
+
"matches_context": true
|
639 |
+
}
|
640 |
+
]
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"relation_type": "synonym",
|
644 |
+
"definition": "look forward to the probable occurrence of",
|
645 |
+
"examples": [
|
646 |
+
"We were expecting a visit from our relatives",
|
647 |
+
"She is looking to a promotion"
|
648 |
+
],
|
649 |
+
"wordnet_pos": "v",
|
650 |
+
"related_words": [
|
651 |
+
{
|
652 |
+
"base_form": "expect",
|
653 |
+
"inflected_form": "expecting",
|
654 |
+
"matches_context": true
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"base_form": "await",
|
658 |
+
"inflected_form": "awaiting",
|
659 |
+
"matches_context": true
|
660 |
+
},
|
661 |
+
{
|
662 |
+
"base_form": "wait",
|
663 |
+
"inflected_form": "waiting",
|
664 |
+
"matches_context": true
|
665 |
+
}
|
666 |
+
]
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"relation_type": "hyponym",
|
670 |
+
"definition": "look forward to the probable occurrence of",
|
671 |
+
"examples": [
|
672 |
+
"We were expecting a visit from our relatives",
|
673 |
+
"She is looking to a promotion"
|
674 |
+
],
|
675 |
+
"wordnet_pos": "v",
|
676 |
+
"related_words": [
|
677 |
+
{
|
678 |
+
"base_form": "look forward",
|
679 |
+
"inflected_form": "looking forward",
|
680 |
+
"matches_context": true
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"base_form": "anticipate",
|
684 |
+
"inflected_form": "anticipating",
|
685 |
+
"matches_context": true
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"base_form": "look for",
|
689 |
+
"inflected_form": "looking for",
|
690 |
+
"matches_context": true
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"base_form": "look to",
|
694 |
+
"inflected_form": "looking to",
|
695 |
+
"matches_context": true
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"base_form": "hang on",
|
699 |
+
"inflected_form": "hanging on",
|
700 |
+
"matches_context": true
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"base_form": "hold the line",
|
704 |
+
"inflected_form": "holding the line",
|
705 |
+
"matches_context": true
|
706 |
+
},
|
707 |
+
{
|
708 |
+
"base_form": "hold on",
|
709 |
+
"inflected_form": "holding on",
|
710 |
+
"matches_context": true
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"base_form": "expect",
|
714 |
+
"inflected_form": "expecting",
|
715 |
+
"matches_context": true
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"base_form": "look to",
|
719 |
+
"inflected_form": "looking to",
|
720 |
+
"matches_context": true
|
721 |
+
}
|
722 |
+
]
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"relation_type": "hypernym",
|
726 |
+
"definition": "accord in appearance with",
|
727 |
+
"examples": [
|
728 |
+
"You don't look your age!"
|
729 |
+
],
|
730 |
+
"wordnet_pos": "v",
|
731 |
+
"related_words": [
|
732 |
+
{
|
733 |
+
"base_form": "match",
|
734 |
+
"inflected_form": "matching",
|
735 |
+
"matches_context": true
|
736 |
+
},
|
737 |
+
{
|
738 |
+
"base_form": "fit",
|
739 |
+
"inflected_form": "fitting",
|
740 |
+
"matches_context": true
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"base_form": "correspond",
|
744 |
+
"inflected_form": "corresponding",
|
745 |
+
"matches_context": true
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"base_form": "check",
|
749 |
+
"inflected_form": "checking",
|
750 |
+
"matches_context": true
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"base_form": "jibe",
|
754 |
+
"inflected_form": "jibing",
|
755 |
+
"matches_context": true
|
756 |
+
},
|
757 |
+
{
|
758 |
+
"base_form": "gibe",
|
759 |
+
"inflected_form": "gibing",
|
760 |
+
"matches_context": true
|
761 |
+
},
|
762 |
+
{
|
763 |
+
"base_form": "tally",
|
764 |
+
"inflected_form": "tallying",
|
765 |
+
"matches_context": true
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"base_form": "agree",
|
769 |
+
"inflected_form": "agreeing",
|
770 |
+
"matches_context": true
|
771 |
+
}
|
772 |
+
]
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"relation_type": "synonym",
|
776 |
+
"definition": "have faith or confidence in",
|
777 |
+
"examples": [
|
778 |
+
"you can count on me to help you any time",
|
779 |
+
"Look to your friends for support"
|
780 |
+
],
|
781 |
+
"wordnet_pos": "v",
|
782 |
+
"related_words": [
|
783 |
+
{
|
784 |
+
"base_form": "count",
|
785 |
+
"inflected_form": "counting",
|
786 |
+
"matches_context": true
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"base_form": "bet",
|
790 |
+
"inflected_form": "betting",
|
791 |
+
"matches_context": true
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"base_form": "depend",
|
795 |
+
"inflected_form": "depending",
|
796 |
+
"matches_context": true
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"base_form": "swear",
|
800 |
+
"inflected_form": "swearing",
|
801 |
+
"matches_context": true
|
802 |
+
},
|
803 |
+
{
|
804 |
+
"base_form": "rely",
|
805 |
+
"inflected_form": "relying",
|
806 |
+
"matches_context": true
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"base_form": "bank",
|
810 |
+
"inflected_form": "banking",
|
811 |
+
"matches_context": true
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"base_form": "calculate",
|
815 |
+
"inflected_form": "calculating",
|
816 |
+
"matches_context": true
|
817 |
+
},
|
818 |
+
{
|
819 |
+
"base_form": "reckon",
|
820 |
+
"inflected_form": "reckoning",
|
821 |
+
"matches_context": true
|
822 |
+
}
|
823 |
+
]
|
824 |
+
},
|
825 |
+
{
|
826 |
+
"relation_type": "hypernym",
|
827 |
+
"definition": "have faith or confidence in",
|
828 |
+
"examples": [
|
829 |
+
"you can count on me to help you any time",
|
830 |
+
"Look to your friends for support"
|
831 |
+
],
|
832 |
+
"wordnet_pos": "v",
|
833 |
+
"related_words": [
|
834 |
+
{
|
835 |
+
"base_form": "trust",
|
836 |
+
"inflected_form": "trusting",
|
837 |
+
"matches_context": true
|
838 |
+
}
|
839 |
+
]
|
840 |
+
}
|
841 |
],
|
842 |
"debug_info": {
|
843 |
"spacy_token_indices": {
|
844 |
"start": 42,
|
845 |
"end": 49
|
846 |
},
|
847 |
+
"lemma": "look"
|
848 |
+
}
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"original_word": "woman",
|
852 |
+
"original_indices": {
|
853 |
+
"start": 22,
|
854 |
+
"end": 27
|
855 |
+
},
|
856 |
+
"context_info": {
|
857 |
+
"pos": "NOUN",
|
858 |
+
"sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
859 |
+
"grammatical_form": "NN",
|
860 |
+
"context_words": [
|
861 |
+
"a",
|
862 |
+
"rather",
|
863 |
+
"severe",
|
864 |
+
"-",
|
865 |
+
"looking",
|
866 |
+
"woman",
|
867 |
+
"who",
|
868 |
+
"was",
|
869 |
+
"wearing",
|
870 |
+
"square",
|
871 |
+
"glasses"
|
872 |
+
],
|
873 |
+
"dependency": "pobj"
|
874 |
+
},
|
875 |
+
"related_word_groups": [
|
876 |
+
{
|
877 |
+
"relation_type": "synonym",
|
878 |
+
"definition": "an adult female person (as opposed to a man)",
|
879 |
+
"examples": [
|
880 |
+
"the woman kept house while the man hunted"
|
881 |
+
],
|
882 |
+
"wordnet_pos": "n",
|
883 |
+
"related_words": [
|
884 |
+
{
|
885 |
+
"base_form": "adult female",
|
886 |
+
"inflected_form": "adult female",
|
887 |
+
"matches_context": false
|
888 |
+
}
|
889 |
+
]
|
890 |
+
},
|
891 |
+
{
|
892 |
+
"relation_type": "antonym",
|
893 |
+
"definition": "",
|
894 |
+
"examples": [],
|
895 |
+
"related_words": [
|
896 |
+
{
|
897 |
+
"base_form": "man",
|
898 |
+
"inflected_form": "man",
|
899 |
+
"matches_context": false
|
900 |
+
}
|
901 |
+
]
|
902 |
+
},
|
903 |
+
{
|
904 |
+
"relation_type": "hypernym",
|
905 |
+
"definition": "an adult female person (as opposed to a man)",
|
906 |
+
"examples": [
|
907 |
+
"the woman kept house while the man hunted"
|
908 |
+
],
|
909 |
+
"wordnet_pos": "n",
|
910 |
+
"related_words": [
|
911 |
+
{
|
912 |
+
"base_form": "female",
|
913 |
+
"inflected_form": "female",
|
914 |
+
"matches_context": false
|
915 |
+
},
|
916 |
+
{
|
917 |
+
"base_form": "female person",
|
918 |
+
"inflected_form": "female person",
|
919 |
+
"matches_context": false
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"base_form": "adult",
|
923 |
+
"inflected_form": "adult",
|
924 |
+
"matches_context": false
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"base_form": "grownup",
|
928 |
+
"inflected_form": "grownup",
|
929 |
+
"matches_context": false
|
930 |
+
}
|
931 |
+
]
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"relation_type": "hyponym",
|
935 |
+
"definition": "an adult female person (as opposed to a man)",
|
936 |
+
"examples": [
|
937 |
+
"the woman kept house while the man hunted"
|
938 |
+
],
|
939 |
+
"wordnet_pos": "n",
|
940 |
+
"related_words": [
|
941 |
+
{
|
942 |
+
"base_form": "jezebel",
|
943 |
+
"inflected_form": "jezebel",
|
944 |
+
"matches_context": false
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"base_form": "mother figure",
|
948 |
+
"inflected_form": "mother figure",
|
949 |
+
"matches_context": false
|
950 |
+
},
|
951 |
+
{
|
952 |
+
"base_form": "smasher",
|
953 |
+
"inflected_form": "smasher",
|
954 |
+
"matches_context": false
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"base_form": "stunner",
|
958 |
+
"inflected_form": "stunner",
|
959 |
+
"matches_context": false
|
960 |
+
},
|
961 |
+
{
|
962 |
+
"base_form": "knockout",
|
963 |
+
"inflected_form": "knockout",
|
964 |
+
"matches_context": false
|
965 |
+
},
|
966 |
+
{
|
967 |
+
"base_form": "beauty",
|
968 |
+
"inflected_form": "beauty",
|
969 |
+
"matches_context": false
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"base_form": "ravisher",
|
973 |
+
"inflected_form": "ravisher",
|
974 |
+
"matches_context": false
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"base_form": "sweetheart",
|
978 |
+
"inflected_form": "sweetheart",
|
979 |
+
"matches_context": false
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"base_form": "peach",
|
983 |
+
"inflected_form": "peach",
|
984 |
+
"matches_context": false
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"base_form": "lulu",
|
988 |
+
"inflected_form": "lulu",
|
989 |
+
"matches_context": false
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"base_form": "looker",
|
993 |
+
"inflected_form": "looker",
|
994 |
+
"matches_context": false
|
995 |
+
},
|
996 |
+
{
|
997 |
+
"base_form": "mantrap",
|
998 |
+
"inflected_form": "mantrap",
|
999 |
+
"matches_context": false
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"base_form": "dish",
|
1003 |
+
"inflected_form": "dish",
|
1004 |
+
"matches_context": false
|
1005 |
+
},
|
1006 |
+
{
|
1007 |
+
"base_form": "B-girl",
|
1008 |
+
"inflected_form": "b-girl",
|
1009 |
+
"matches_context": false
|
1010 |
+
},
|
1011 |
+
{
|
1012 |
+
"base_form": "bar girl",
|
1013 |
+
"inflected_form": "bar girl",
|
1014 |
+
"matches_context": false
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"base_form": "heroine",
|
1018 |
+
"inflected_form": "heroine",
|
1019 |
+
"matches_context": false
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"base_form": "prostitute",
|
1023 |
+
"inflected_form": "prostitute",
|
1024 |
+
"matches_context": false
|
1025 |
+
},
|
1026 |
+
{
|
1027 |
+
"base_form": "cocotte",
|
1028 |
+
"inflected_form": "cocotte",
|
1029 |
+
"matches_context": false
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"base_form": "whore",
|
1033 |
+
"inflected_form": "whore",
|
1034 |
+
"matches_context": false
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"base_form": "harlot",
|
1038 |
+
"inflected_form": "harlot",
|
1039 |
+
"matches_context": false
|
1040 |
+
},
|
1041 |
+
{
|
1042 |
+
"base_form": "bawd",
|
1043 |
+
"inflected_form": "bawd",
|
1044 |
+
"matches_context": false
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"base_form": "tart",
|
1048 |
+
"inflected_form": "tart",
|
1049 |
+
"matches_context": false
|
1050 |
+
},
|
1051 |
+
{
|
1052 |
+
"base_form": "cyprian",
|
1053 |
+
"inflected_form": "cyprian",
|
1054 |
+
"matches_context": false
|
1055 |
+
},
|
1056 |
+
{
|
1057 |
+
"base_form": "fancy woman",
|
1058 |
+
"inflected_form": "fancy woman",
|
1059 |
+
"matches_context": false
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"base_form": "working girl",
|
1063 |
+
"inflected_form": "working girl",
|
1064 |
+
"matches_context": false
|
1065 |
+
},
|
1066 |
+
{
|
1067 |
+
"base_form": "sporting lady",
|
1068 |
+
"inflected_form": "sporting lady",
|
1069 |
+
"matches_context": false
|
1070 |
+
},
|
1071 |
+
{
|
1072 |
+
"base_form": "lady of pleasure",
|
1073 |
+
"inflected_form": "lady of pleasure",
|
1074 |
+
"matches_context": false
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"base_form": "woman of the street",
|
1078 |
+
"inflected_form": "woman of the street",
|
1079 |
+
"matches_context": false
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"base_form": "inamorata",
|
1083 |
+
"inflected_form": "inamorata",
|
1084 |
+
"matches_context": false
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"base_form": "cat",
|
1088 |
+
"inflected_form": "cat",
|
1089 |
+
"matches_context": false
|
1090 |
+
},
|
1091 |
+
{
|
1092 |
+
"base_form": "girl",
|
1093 |
+
"inflected_form": "girl",
|
1094 |
+
"matches_context": false
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"base_form": "miss",
|
1098 |
+
"inflected_form": "miss",
|
1099 |
+
"matches_context": false
|
1100 |
+
},
|
1101 |
+
{
|
1102 |
+
"base_form": "missy",
|
1103 |
+
"inflected_form": "missy",
|
1104 |
+
"matches_context": false
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"base_form": "young lady",
|
1108 |
+
"inflected_form": "young lady",
|
1109 |
+
"matches_context": false
|
1110 |
+
},
|
1111 |
+
{
|
1112 |
+
"base_form": "young woman",
|
1113 |
+
"inflected_form": "young woman",
|
1114 |
+
"matches_context": false
|
1115 |
+
},
|
1116 |
+
{
|
1117 |
+
"base_form": "fille",
|
1118 |
+
"inflected_form": "fille",
|
1119 |
+
"matches_context": false
|
1120 |
+
},
|
1121 |
+
{
|
1122 |
+
"base_form": "matriarch",
|
1123 |
+
"inflected_form": "matriarch",
|
1124 |
+
"matches_context": false
|
1125 |
+
},
|
1126 |
+
{
|
1127 |
+
"base_form": "wife",
|
1128 |
+
"inflected_form": "wife",
|
1129 |
+
"matches_context": false
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"base_form": "married woman",
|
1133 |
+
"inflected_form": "married woman",
|
1134 |
+
"matches_context": false
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"base_form": "girlfriend",
|
1138 |
+
"inflected_form": "girlfriend",
|
1139 |
+
"matches_context": false
|
1140 |
+
},
|
1141 |
+
{
|
1142 |
+
"base_form": "Cinderella",
|
1143 |
+
"inflected_form": "cinderella",
|
1144 |
+
"matches_context": false
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"base_form": "bachelor girl",
|
1148 |
+
"inflected_form": "bachelor girl",
|
1149 |
+
"matches_context": false
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"base_form": "bachelorette",
|
1153 |
+
"inflected_form": "bachelorette",
|
1154 |
+
"matches_context": false
|
1155 |
+
},
|
1156 |
+
{
|
1157 |
+
"base_form": "vestal",
|
1158 |
+
"inflected_form": "vestal",
|
1159 |
+
"matches_context": false
|
1160 |
+
},
|
1161 |
+
{
|
1162 |
+
"base_form": "widow",
|
1163 |
+
"inflected_form": "widow",
|
1164 |
+
"matches_context": false
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"base_form": "widow woman",
|
1168 |
+
"inflected_form": "widow woman",
|
1169 |
+
"matches_context": false
|
1170 |
+
},
|
1171 |
+
{
|
1172 |
+
"base_form": "Wave",
|
1173 |
+
"inflected_form": "wave",
|
1174 |
+
"matches_context": false
|
1175 |
+
},
|
1176 |
+
{
|
1177 |
+
"base_form": "nymphet",
|
1178 |
+
"inflected_form": "nymphet",
|
1179 |
+
"matches_context": false
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"base_form": "bluestocking",
|
1183 |
+
"inflected_form": "bluestocking",
|
1184 |
+
"matches_context": false
|
1185 |
+
},
|
1186 |
+
{
|
1187 |
+
"base_form": "bas bleu",
|
1188 |
+
"inflected_form": "bas bleu",
|
1189 |
+
"matches_context": false
|
1190 |
+
},
|
1191 |
+
{
|
1192 |
+
"base_form": "coquette",
|
1193 |
+
"inflected_form": "coquette",
|
1194 |
+
"matches_context": false
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"base_form": "flirt",
|
1198 |
+
"inflected_form": "flirt",
|
1199 |
+
"matches_context": false
|
1200 |
+
},
|
1201 |
+
{
|
1202 |
+
"base_form": "vamp",
|
1203 |
+
"inflected_form": "vamp",
|
1204 |
+
"matches_context": false
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"base_form": "vamper",
|
1208 |
+
"inflected_form": "vamper",
|
1209 |
+
"matches_context": false
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"base_form": "minx",
|
1213 |
+
"inflected_form": "minx",
|
1214 |
+
"matches_context": false
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"base_form": "tease",
|
1218 |
+
"inflected_form": "tease",
|
1219 |
+
"matches_context": false
|
1220 |
+
},
|
1221 |
+
{
|
1222 |
+
"base_form": "prickteaser",
|
1223 |
+
"inflected_form": "prickteaser",
|
1224 |
+
"matches_context": false
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"base_form": "bridesmaid",
|
1228 |
+
"inflected_form": "bridesmaid",
|
1229 |
+
"matches_context": false
|
1230 |
+
},
|
1231 |
+
{
|
1232 |
+
"base_form": "maid of honor",
|
1233 |
+
"inflected_form": "maid of honor",
|
1234 |
+
"matches_context": false
|
1235 |
+
},
|
1236 |
+
{
|
1237 |
+
"base_form": "white woman",
|
1238 |
+
"inflected_form": "white woman",
|
1239 |
+
"matches_context": false
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"base_form": "wonder woman",
|
1243 |
+
"inflected_form": "wonder woman",
|
1244 |
+
"matches_context": false
|
1245 |
+
},
|
1246 |
+
{
|
1247 |
+
"base_form": "Wac",
|
1248 |
+
"inflected_form": "wac",
|
1249 |
+
"matches_context": false
|
1250 |
+
},
|
1251 |
+
{
|
1252 |
+
"base_form": "mestiza",
|
1253 |
+
"inflected_form": "mestiza",
|
1254 |
+
"matches_context": false
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"base_form": "nanny",
|
1258 |
+
"inflected_form": "nanny",
|
1259 |
+
"matches_context": false
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"base_form": "nursemaid",
|
1263 |
+
"inflected_form": "nursemaid",
|
1264 |
+
"matches_context": false
|
1265 |
+
},
|
1266 |
+
{
|
1267 |
+
"base_form": "nurse",
|
1268 |
+
"inflected_form": "nurse",
|
1269 |
+
"matches_context": false
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"base_form": "debutante",
|
1273 |
+
"inflected_form": "debutante",
|
1274 |
+
"matches_context": false
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"base_form": "deb",
|
1278 |
+
"inflected_form": "deb",
|
1279 |
+
"matches_context": false
|
1280 |
+
},
|
1281 |
+
{
|
1282 |
+
"base_form": "dominatrix",
|
1283 |
+
"inflected_form": "dominatrix",
|
1284 |
+
"matches_context": false
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"base_form": "girlfriend",
|
1288 |
+
"inflected_form": "girlfriend",
|
1289 |
+
"matches_context": false
|
1290 |
+
},
|
1291 |
+
{
|
1292 |
+
"base_form": "girl",
|
1293 |
+
"inflected_form": "girl",
|
1294 |
+
"matches_context": false
|
1295 |
+
},
|
1296 |
+
{
|
1297 |
+
"base_form": "lady friend",
|
1298 |
+
"inflected_form": "lady friend",
|
1299 |
+
"matches_context": false
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"base_form": "unmarried woman",
|
1303 |
+
"inflected_form": "unmarried woman",
|
1304 |
+
"matches_context": false
|
1305 |
+
},
|
1306 |
+
{
|
1307 |
+
"base_form": "donna",
|
1308 |
+
"inflected_form": "donna",
|
1309 |
+
"matches_context": false
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"base_form": "eyeful",
|
1313 |
+
"inflected_form": "eyeful",
|
1314 |
+
"matches_context": false
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"base_form": "divorcee",
|
1318 |
+
"inflected_form": "divorcee",
|
1319 |
+
"matches_context": false
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"base_form": "grass widow",
|
1323 |
+
"inflected_form": "grass widow",
|
1324 |
+
"matches_context": false
|
1325 |
+
},
|
1326 |
+
{
|
1327 |
+
"base_form": "amazon",
|
1328 |
+
"inflected_form": "amazon",
|
1329 |
+
"matches_context": false
|
1330 |
+
},
|
1331 |
+
{
|
1332 |
+
"base_form": "virago",
|
1333 |
+
"inflected_form": "virago",
|
1334 |
+
"matches_context": false
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"base_form": "lady",
|
1338 |
+
"inflected_form": "lady",
|
1339 |
+
"matches_context": false
|
1340 |
+
},
|
1341 |
+
{
|
1342 |
+
"base_form": "maenad",
|
1343 |
+
"inflected_form": "maenad",
|
1344 |
+
"matches_context": false
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"base_form": "mistress",
|
1348 |
+
"inflected_form": "mistress",
|
1349 |
+
"matches_context": false
|
1350 |
+
},
|
1351 |
+
{
|
1352 |
+
"base_form": "kept woman",
|
1353 |
+
"inflected_form": "kept woman",
|
1354 |
+
"matches_context": false
|
1355 |
+
},
|
1356 |
+
{
|
1357 |
+
"base_form": "fancy woman",
|
1358 |
+
"inflected_form": "fancy woman",
|
1359 |
+
"matches_context": false
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"base_form": "nymph",
|
1363 |
+
"inflected_form": "nymph",
|
1364 |
+
"matches_context": false
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"base_form": "houri",
|
1368 |
+
"inflected_form": "houri",
|
1369 |
+
"matches_context": false
|
1370 |
+
},
|
1371 |
+
{
|
1372 |
+
"base_form": "geisha",
|
1373 |
+
"inflected_form": "geisha",
|
1374 |
+
"matches_context": false
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"base_form": "geisha girl",
|
1378 |
+
"inflected_form": "geisha girl",
|
1379 |
+
"matches_context": false
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"base_form": "matriarch",
|
1383 |
+
"inflected_form": "matriarch",
|
1384 |
+
"matches_context": false
|
1385 |
+
},
|
1386 |
+
{
|
1387 |
+
"base_form": "materfamilias",
|
1388 |
+
"inflected_form": "materfamilias",
|
1389 |
+
"matches_context": false
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"base_form": "matron",
|
1393 |
+
"inflected_form": "matron",
|
1394 |
+
"matches_context": false
|
1395 |
+
},
|
1396 |
+
{
|
1397 |
+
"base_form": "baggage",
|
1398 |
+
"inflected_form": "baggage",
|
1399 |
+
"matches_context": false
|
1400 |
+
},
|
1401 |
+
{
|
1402 |
+
"base_form": "broad",
|
1403 |
+
"inflected_form": "broad",
|
1404 |
+
"matches_context": false
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"base_form": "girl",
|
1408 |
+
"inflected_form": "girl",
|
1409 |
+
"matches_context": false
|
1410 |
+
},
|
1411 |
+
{
|
1412 |
+
"base_form": "enchantress",
|
1413 |
+
"inflected_form": "enchantress",
|
1414 |
+
"matches_context": false
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"base_form": "temptress",
|
1418 |
+
"inflected_form": "temptress",
|
1419 |
+
"matches_context": false
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"base_form": "siren",
|
1423 |
+
"inflected_form": "siren",
|
1424 |
+
"matches_context": false
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"base_form": "Delilah",
|
1428 |
+
"inflected_form": "delilah",
|
1429 |
+
"matches_context": false
|
1430 |
+
},
|
1431 |
+
{
|
1432 |
+
"base_form": "femme fatale",
|
1433 |
+
"inflected_form": "femme fatale",
|
1434 |
+
"matches_context": false
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"base_form": "gravida",
|
1438 |
+
"inflected_form": "gravida",
|
1439 |
+
"matches_context": false
|
1440 |
+
},
|
1441 |
+
{
|
1442 |
+
"base_form": "jilt",
|
1443 |
+
"inflected_form": "jilt",
|
1444 |
+
"matches_context": false
|
1445 |
+
},
|
1446 |
+
{
|
1447 |
+
"base_form": "maenad",
|
1448 |
+
"inflected_form": "maenad",
|
1449 |
+
"matches_context": false
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"base_form": "nullipara",
|
1453 |
+
"inflected_form": "nullipara",
|
1454 |
+
"matches_context": false
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"base_form": "shiksa",
|
1458 |
+
"inflected_form": "shiksa",
|
1459 |
+
"matches_context": false
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"base_form": "shikse",
|
1463 |
+
"inflected_form": "shikse",
|
1464 |
+
"matches_context": false
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"base_form": "ex-wife",
|
1468 |
+
"inflected_form": "ex-wife",
|
1469 |
+
"matches_context": false
|
1470 |
+
},
|
1471 |
+
{
|
1472 |
+
"base_form": "ex",
|
1473 |
+
"inflected_form": "ex",
|
1474 |
+
"matches_context": false
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"base_form": "gold digger",
|
1478 |
+
"inflected_form": "gold digger",
|
1479 |
+
"matches_context": false
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"base_form": "old woman",
|
1483 |
+
"inflected_form": "old woman",
|
1484 |
+
"matches_context": false
|
1485 |
+
},
|
1486 |
+
{
|
1487 |
+
"base_form": "dame",
|
1488 |
+
"inflected_form": "dame",
|
1489 |
+
"matches_context": false
|
1490 |
+
},
|
1491 |
+
{
|
1492 |
+
"base_form": "madam",
|
1493 |
+
"inflected_form": "madam",
|
1494 |
+
"matches_context": false
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"base_form": "ma'am",
|
1498 |
+
"inflected_form": "ma'am",
|
1499 |
+
"matches_context": false
|
1500 |
+
},
|
1501 |
+
{
|
1502 |
+
"base_form": "lady",
|
1503 |
+
"inflected_form": "lady",
|
1504 |
+
"matches_context": false
|
1505 |
+
},
|
1506 |
+
{
|
1507 |
+
"base_form": "gentlewoman",
|
1508 |
+
"inflected_form": "gentlewoman",
|
1509 |
+
"matches_context": false
|
1510 |
+
},
|
1511 |
+
{
|
1512 |
+
"base_form": "sylph",
|
1513 |
+
"inflected_form": "sylph",
|
1514 |
+
"matches_context": false
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"base_form": "ball-buster",
|
1518 |
+
"inflected_form": "ball-buster",
|
1519 |
+
"matches_context": false
|
1520 |
+
},
|
1521 |
+
{
|
1522 |
+
"base_form": "ball-breaker",
|
1523 |
+
"inflected_form": "ball-breaker",
|
1524 |
+
"matches_context": false
|
1525 |
+
}
|
1526 |
+
]
|
1527 |
+
},
|
1528 |
{
|
1529 |
+
"relation_type": "meronym",
|
1530 |
"definition": "an adult female person (as opposed to a man)",
|
1531 |
"examples": [
|
1532 |
"the woman kept house while the man hunted"
|
1533 |
],
|
1534 |
"wordnet_pos": "n",
|
1535 |
+
"related_words": [
|
1536 |
{
|
1537 |
+
"base_form": "adult female body",
|
1538 |
+
"inflected_form": "adult female body",
|
1539 |
+
"matches_context": false
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"base_form": "woman's body",
|
1543 |
+
"inflected_form": "woman's body",
|
1544 |
+
"matches_context": false
|
1545 |
+
}
|
1546 |
+
]
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"relation_type": "antonym",
|
1550 |
+
"definition": "",
|
1551 |
+
"examples": [],
|
1552 |
+
"related_words": [
|
1553 |
+
{
|
1554 |
+
"base_form": "man",
|
1555 |
+
"inflected_form": "man",
|
1556 |
+
"matches_context": false
|
1557 |
+
}
|
1558 |
+
]
|
1559 |
+
},
|
1560 |
+
{
|
1561 |
+
"relation_type": "hypernym",
|
1562 |
+
"definition": "a female person who plays a significant role (wife or mistress or girlfriend) in the life of a particular man",
|
1563 |
+
"examples": [
|
1564 |
+
"he was faithful to his woman"
|
1565 |
+
],
|
1566 |
+
"wordnet_pos": "n",
|
1567 |
+
"related_words": [
|
1568 |
+
{
|
1569 |
+
"base_form": "female",
|
1570 |
+
"inflected_form": "female",
|
1571 |
+
"matches_context": false
|
1572 |
+
},
|
1573 |
+
{
|
1574 |
+
"base_form": "female person",
|
1575 |
+
"inflected_form": "female person",
|
1576 |
"matches_context": false
|
1577 |
}
|
1578 |
]
|
1579 |
},
|
1580 |
{
|
1581 |
+
"relation_type": "synonym",
|
1582 |
"definition": "a human female employed to do housework",
|
1583 |
"examples": [
|
1584 |
"the char will clean the carpet",
|
1585 |
"I have a woman who comes in four hours a day while I write"
|
1586 |
],
|
1587 |
"wordnet_pos": "n",
|
1588 |
+
"related_words": [
|
1589 |
+
{
|
1590 |
+
"base_form": "charwoman",
|
1591 |
+
"inflected_form": "charwoman",
|
1592 |
+
"matches_context": false
|
1593 |
+
},
|
1594 |
{
|
1595 |
"base_form": "char",
|
1596 |
"inflected_form": "char",
|
1597 |
"matches_context": false
|
1598 |
},
|
1599 |
{
|
1600 |
+
"base_form": "cleaning woman",
|
1601 |
+
"inflected_form": "cleaning woman",
|
1602 |
"matches_context": false
|
1603 |
},
|
1604 |
{
|
1605 |
"base_form": "cleaning lady",
|
1606 |
"inflected_form": "cleaning lady",
|
1607 |
"matches_context": false
|
1608 |
+
}
|
1609 |
+
]
|
1610 |
+
},
|
1611 |
+
{
|
1612 |
+
"relation_type": "hypernym",
|
1613 |
+
"definition": "a human female employed to do housework",
|
1614 |
+
"examples": [
|
1615 |
+
"the char will clean the carpet",
|
1616 |
+
"I have a woman who comes in four hours a day while I write"
|
1617 |
+
],
|
1618 |
+
"wordnet_pos": "n",
|
1619 |
+
"related_words": [
|
1620 |
{
|
1621 |
+
"base_form": "cleaner",
|
1622 |
+
"inflected_form": "cleaner",
|
1623 |
"matches_context": false
|
1624 |
}
|
1625 |
]
|
1626 |
},
|
1627 |
{
|
1628 |
+
"relation_type": "synonym",
|
1629 |
"definition": "women as a class",
|
1630 |
"examples": [
|
1631 |
"it's an insult to American womanhood",
|
1632 |
"woman is the glory of creation"
|
1633 |
],
|
1634 |
"wordnet_pos": "n",
|
1635 |
+
"related_words": [
|
1636 |
+
{
|
1637 |
+
"base_form": "womanhood",
|
1638 |
+
"inflected_form": "womanhood",
|
1639 |
+
"matches_context": false
|
1640 |
+
},
|
1641 |
{
|
1642 |
"base_form": "fair sex",
|
1643 |
"inflected_form": "fair sex",
|
1644 |
"matches_context": false
|
1645 |
+
}
|
1646 |
+
]
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"relation_type": "hypernym",
|
1650 |
+
"definition": "women as a class",
|
1651 |
+
"examples": [
|
1652 |
+
"it's an insult to American womanhood",
|
1653 |
+
"woman is the glory of creation"
|
1654 |
+
],
|
1655 |
+
"wordnet_pos": "n",
|
1656 |
+
"related_words": [
|
1657 |
+
{
|
1658 |
+
"base_form": "class",
|
1659 |
+
"inflected_form": "class",
|
1660 |
+
"matches_context": false
|
1661 |
},
|
1662 |
{
|
1663 |
+
"base_form": "stratum",
|
1664 |
+
"inflected_form": "stratum",
|
1665 |
+
"matches_context": false
|
1666 |
+
},
|
1667 |
+
{
|
1668 |
+
"base_form": "social class",
|
1669 |
+
"inflected_form": "social class",
|
1670 |
+
"matches_context": false
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"base_form": "socio-economic class",
|
1674 |
+
"inflected_form": "socio-economic class",
|
1675 |
+
"matches_context": false
|
1676 |
+
}
|
1677 |
+
]
|
1678 |
+
},
|
1679 |
+
{
|
1680 |
+
"relation_type": "holonym",
|
1681 |
+
"definition": "women as a class",
|
1682 |
+
"examples": [
|
1683 |
+
"it's an insult to American womanhood",
|
1684 |
+
"woman is the glory of creation"
|
1685 |
+
],
|
1686 |
+
"wordnet_pos": "n",
|
1687 |
+
"related_words": [
|
1688 |
+
{
|
1689 |
+
"base_form": "womankind",
|
1690 |
+
"inflected_form": "womankind",
|
1691 |
"matches_context": false
|
1692 |
}
|
1693 |
]
|
|
|
1702 |
}
|
1703 |
}
|
1704 |
],
|
1705 |
+
"message": "Got 2 synonym groups.",
|
1706 |
+
"duration": 0.0003
|
1707 |
}
|
tests/events/response_thesaurus_phrase_inflated2.json
ADDED
@@ -0,0 +1,1707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"success": true,
|
3 |
+
"original_phrase": "rather severe-looking woman",
|
4 |
+
"original_indices": {
|
5 |
+
"start": 28,
|
6 |
+
"end": 55
|
7 |
+
},
|
8 |
+
"results": [
|
9 |
+
{
|
10 |
+
"original_word": "looking",
|
11 |
+
"original_indices": {
|
12 |
+
"start": 14,
|
13 |
+
"end": 21
|
14 |
+
},
|
15 |
+
"context_info": {
|
16 |
+
"pos": "VERB",
|
17 |
+
"sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
18 |
+
"grammatical_form": "VBG",
|
19 |
+
"context_words": [
|
20 |
+
"at",
|
21 |
+
"a",
|
22 |
+
"rather",
|
23 |
+
"severe",
|
24 |
+
"-",
|
25 |
+
"looking",
|
26 |
+
"woman",
|
27 |
+
"who",
|
28 |
+
"was",
|
29 |
+
"wearing",
|
30 |
+
"square"
|
31 |
+
],
|
32 |
+
"dependency": "amod"
|
33 |
+
},
|
34 |
+
"related_word_groups": [
|
35 |
+
{
|
36 |
+
"relation_type": "hyponym",
|
37 |
+
"definition": "perceive with attention; direct one's gaze towards",
|
38 |
+
"examples": [
|
39 |
+
"She looked over the expanse of land",
|
40 |
+
"Look at your child!"
|
41 |
+
],
|
42 |
+
"wordnet_pos": "v",
|
43 |
+
"related_words": [
|
44 |
+
{
|
45 |
+
"base_form": "goggle",
|
46 |
+
"inflected_form": "goggling",
|
47 |
+
"matches_context": true
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"base_form": "gape",
|
51 |
+
"inflected_form": "gaping",
|
52 |
+
"matches_context": true
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"base_form": "gawp",
|
56 |
+
"inflected_form": "gawping",
|
57 |
+
"matches_context": true
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"base_form": "gawk",
|
61 |
+
"inflected_form": "gawking",
|
62 |
+
"matches_context": true
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"base_form": "gloat",
|
66 |
+
"inflected_form": "gloating",
|
67 |
+
"matches_context": true
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"base_form": "look around",
|
71 |
+
"inflected_form": "looking around",
|
72 |
+
"matches_context": true
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"base_form": "ogle",
|
76 |
+
"inflected_form": "ogling",
|
77 |
+
"matches_context": true
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"base_form": "give the glad eye",
|
81 |
+
"inflected_form": "giving the glad eye",
|
82 |
+
"matches_context": true
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"base_form": "peep",
|
86 |
+
"inflected_form": "peeping",
|
87 |
+
"matches_context": true
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"base_form": "look back",
|
91 |
+
"inflected_form": "looking back",
|
92 |
+
"matches_context": true
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"base_form": "look backward",
|
96 |
+
"inflected_form": "looking backward",
|
97 |
+
"matches_context": true
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"base_form": "regard",
|
101 |
+
"inflected_form": "regarding",
|
102 |
+
"matches_context": true
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"base_form": "consider",
|
106 |
+
"inflected_form": "considering",
|
107 |
+
"matches_context": true
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"base_form": "glance",
|
111 |
+
"inflected_form": "glancing",
|
112 |
+
"matches_context": true
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"base_form": "peek",
|
116 |
+
"inflected_form": "peeking",
|
117 |
+
"matches_context": true
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"base_form": "glint",
|
121 |
+
"inflected_form": "glinting",
|
122 |
+
"matches_context": true
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"base_form": "eye",
|
126 |
+
"inflected_form": "eyeing",
|
127 |
+
"matches_context": true
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"base_form": "eyeball",
|
131 |
+
"inflected_form": "eyeballing",
|
132 |
+
"matches_context": true
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"base_form": "peer",
|
136 |
+
"inflected_form": "peering",
|
137 |
+
"matches_context": true
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"base_form": "admire",
|
141 |
+
"inflected_form": "admiring",
|
142 |
+
"matches_context": true
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"base_form": "gaze",
|
146 |
+
"inflected_form": "gazing",
|
147 |
+
"matches_context": true
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"base_form": "stare",
|
151 |
+
"inflected_form": "staring",
|
152 |
+
"matches_context": true
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"base_form": "look away",
|
156 |
+
"inflected_form": "looking away",
|
157 |
+
"matches_context": true
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"base_form": "leer",
|
161 |
+
"inflected_form": "leering",
|
162 |
+
"matches_context": true
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"base_form": "give the eye",
|
166 |
+
"inflected_form": "giving the eye",
|
167 |
+
"matches_context": true
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"base_form": "give the once over",
|
171 |
+
"inflected_form": "giving the once over",
|
172 |
+
"matches_context": true
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"base_form": "squint",
|
176 |
+
"inflected_form": "squinting",
|
177 |
+
"matches_context": true
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"base_form": "take a look",
|
181 |
+
"inflected_form": "taking a look",
|
182 |
+
"matches_context": true
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"base_form": "have a look",
|
186 |
+
"inflected_form": "having a look",
|
187 |
+
"matches_context": true
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"base_form": "get a load",
|
191 |
+
"inflected_form": "getting a load",
|
192 |
+
"matches_context": true
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"base_form": "stare",
|
196 |
+
"inflected_form": "staring",
|
197 |
+
"matches_context": true
|
198 |
+
}
|
199 |
+
]
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"relation_type": "synonym",
|
203 |
+
"definition": "give a certain impression or have a certain outward aspect",
|
204 |
+
"examples": [
|
205 |
+
"She seems to be sleeping",
|
206 |
+
"This appears to be a very difficult problem"
|
207 |
+
],
|
208 |
+
"wordnet_pos": "v",
|
209 |
+
"related_words": [
|
210 |
+
{
|
211 |
+
"base_form": "appear",
|
212 |
+
"inflected_form": "appearing",
|
213 |
+
"matches_context": true
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"base_form": "seem",
|
217 |
+
"inflected_form": "seeming",
|
218 |
+
"matches_context": true
|
219 |
+
}
|
220 |
+
]
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"relation_type": "hypernym",
|
224 |
+
"definition": "give a certain impression or have a certain outward aspect",
|
225 |
+
"examples": [
|
226 |
+
"She seems to be sleeping",
|
227 |
+
"This appears to be a very difficult problem"
|
228 |
+
],
|
229 |
+
"wordnet_pos": "v",
|
230 |
+
"related_words": [
|
231 |
+
{
|
232 |
+
"base_form": "be",
|
233 |
+
"inflected_form": "being",
|
234 |
+
"matches_context": true
|
235 |
+
}
|
236 |
+
]
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"relation_type": "hyponym",
|
240 |
+
"definition": "give a certain impression or have a certain outward aspect",
|
241 |
+
"examples": [
|
242 |
+
"She seems to be sleeping",
|
243 |
+
"This appears to be a very difficult problem"
|
244 |
+
],
|
245 |
+
"wordnet_pos": "v",
|
246 |
+
"related_words": [
|
247 |
+
{
|
248 |
+
"base_form": "glow",
|
249 |
+
"inflected_form": "glowing",
|
250 |
+
"matches_context": true
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"base_form": "beam",
|
254 |
+
"inflected_form": "beaming",
|
255 |
+
"matches_context": true
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"base_form": "radiate",
|
259 |
+
"inflected_form": "radiating",
|
260 |
+
"matches_context": true
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"base_form": "shine",
|
264 |
+
"inflected_form": "shining",
|
265 |
+
"matches_context": true
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"base_form": "rise",
|
269 |
+
"inflected_form": "rising",
|
270 |
+
"matches_context": true
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"base_form": "lift",
|
274 |
+
"inflected_form": "lifting",
|
275 |
+
"matches_context": true
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"base_form": "rear",
|
279 |
+
"inflected_form": "rearing",
|
280 |
+
"matches_context": true
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"base_form": "glitter",
|
284 |
+
"inflected_form": "glittering",
|
285 |
+
"matches_context": true
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"base_form": "glisten",
|
289 |
+
"inflected_form": "glistening",
|
290 |
+
"matches_context": true
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"base_form": "glint",
|
294 |
+
"inflected_form": "glinting",
|
295 |
+
"matches_context": true
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"base_form": "gleam",
|
299 |
+
"inflected_form": "gleaming",
|
300 |
+
"matches_context": true
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"base_form": "shine",
|
304 |
+
"inflected_form": "shining",
|
305 |
+
"matches_context": true
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"base_form": "leap out",
|
309 |
+
"inflected_form": "leaping out",
|
310 |
+
"matches_context": true
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"base_form": "jump out",
|
314 |
+
"inflected_form": "jumping out",
|
315 |
+
"matches_context": true
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"base_form": "jump",
|
319 |
+
"inflected_form": "jumping",
|
320 |
+
"matches_context": true
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"base_form": "stand out",
|
324 |
+
"inflected_form": "standing out",
|
325 |
+
"matches_context": true
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"base_form": "stick out",
|
329 |
+
"inflected_form": "sticking out",
|
330 |
+
"matches_context": true
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"base_form": "make",
|
334 |
+
"inflected_form": "making",
|
335 |
+
"matches_context": true
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"base_form": "loom",
|
339 |
+
"inflected_form": "looming",
|
340 |
+
"matches_context": true
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"base_form": "sound",
|
344 |
+
"inflected_form": "sounding",
|
345 |
+
"matches_context": true
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"base_form": "cut",
|
349 |
+
"inflected_form": "cutting",
|
350 |
+
"matches_context": true
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"base_form": "pass off",
|
354 |
+
"inflected_form": "passing off",
|
355 |
+
"matches_context": true
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"base_form": "come across",
|
359 |
+
"inflected_form": "coming across",
|
360 |
+
"matches_context": true
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"base_form": "feel",
|
364 |
+
"inflected_form": "feeling",
|
365 |
+
"matches_context": true
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"base_form": "feel",
|
369 |
+
"inflected_form": "feeling",
|
370 |
+
"matches_context": true
|
371 |
+
}
|
372 |
+
]
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"relation_type": "hypernym",
|
376 |
+
"definition": "have a certain outward or facial expression",
|
377 |
+
"examples": [
|
378 |
+
"How does she look?",
|
379 |
+
"The child looks unhappy"
|
380 |
+
],
|
381 |
+
"wordnet_pos": "v",
|
382 |
+
"related_words": [
|
383 |
+
{
|
384 |
+
"base_form": "be",
|
385 |
+
"inflected_form": "being",
|
386 |
+
"matches_context": true
|
387 |
+
}
|
388 |
+
]
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"relation_type": "hyponym",
|
392 |
+
"definition": "have a certain outward or facial expression",
|
393 |
+
"examples": [
|
394 |
+
"How does she look?",
|
395 |
+
"The child looks unhappy"
|
396 |
+
],
|
397 |
+
"wordnet_pos": "v",
|
398 |
+
"related_words": [
|
399 |
+
{
|
400 |
+
"base_form": "squint",
|
401 |
+
"inflected_form": "squinting",
|
402 |
+
"matches_context": true
|
403 |
+
}
|
404 |
+
]
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"relation_type": "synonym",
|
408 |
+
"definition": "search or seek",
|
409 |
+
"examples": [
|
410 |
+
"We looked all day and finally found the child in the forest",
|
411 |
+
"Look elsewhere for the perfect gift!"
|
412 |
+
],
|
413 |
+
"wordnet_pos": "v",
|
414 |
+
"related_words": [
|
415 |
+
{
|
416 |
+
"base_form": "search",
|
417 |
+
"inflected_form": "searching",
|
418 |
+
"matches_context": true
|
419 |
+
}
|
420 |
+
]
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"relation_type": "hypernym",
|
424 |
+
"definition": "search or seek",
|
425 |
+
"examples": [
|
426 |
+
"We looked all day and finally found the child in the forest",
|
427 |
+
"Look elsewhere for the perfect gift!"
|
428 |
+
],
|
429 |
+
"wordnet_pos": "v",
|
430 |
+
"related_words": [
|
431 |
+
{
|
432 |
+
"base_form": "examine",
|
433 |
+
"inflected_form": "examining",
|
434 |
+
"matches_context": true
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"base_form": "see",
|
438 |
+
"inflected_form": "seeing",
|
439 |
+
"matches_context": true
|
440 |
+
}
|
441 |
+
]
|
442 |
+
},
|
443 |
+
{
|
444 |
+
"relation_type": "hyponym",
|
445 |
+
"definition": "search or seek",
|
446 |
+
"examples": [
|
447 |
+
"We looked all day and finally found the child in the forest",
|
448 |
+
"Look elsewhere for the perfect gift!"
|
449 |
+
],
|
450 |
+
"wordnet_pos": "v",
|
451 |
+
"related_words": [
|
452 |
+
{
|
453 |
+
"base_form": "hunt",
|
454 |
+
"inflected_form": "hunting",
|
455 |
+
"matches_context": true
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"base_form": "cruise",
|
459 |
+
"inflected_form": "cruising",
|
460 |
+
"matches_context": true
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"base_form": "prospect",
|
464 |
+
"inflected_form": "prospecting",
|
465 |
+
"matches_context": true
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"base_form": "intrude",
|
469 |
+
"inflected_form": "intruding",
|
470 |
+
"matches_context": true
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"base_form": "horn in",
|
474 |
+
"inflected_form": "horning in",
|
475 |
+
"matches_context": true
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"base_form": "pry",
|
479 |
+
"inflected_form": "prying",
|
480 |
+
"matches_context": true
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"base_form": "nose",
|
484 |
+
"inflected_form": "nosing",
|
485 |
+
"matches_context": true
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"base_form": "poke",
|
489 |
+
"inflected_form": "poking",
|
490 |
+
"matches_context": true
|
491 |
+
}
|
492 |
+
]
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"relation_type": "synonym",
|
496 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
497 |
+
"examples": [
|
498 |
+
"The house looks north",
|
499 |
+
"My backyard look onto the pond"
|
500 |
+
],
|
501 |
+
"wordnet_pos": "v",
|
502 |
+
"related_words": [
|
503 |
+
{
|
504 |
+
"base_form": "front",
|
505 |
+
"inflected_form": "fronting",
|
506 |
+
"matches_context": true
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"base_form": "face",
|
510 |
+
"inflected_form": "facing",
|
511 |
+
"matches_context": true
|
512 |
+
}
|
513 |
+
]
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"relation_type": "antonym",
|
517 |
+
"definition": "",
|
518 |
+
"examples": [],
|
519 |
+
"related_words": [
|
520 |
+
{
|
521 |
+
"base_form": "back",
|
522 |
+
"inflected_form": "backing",
|
523 |
+
"matches_context": true
|
524 |
+
}
|
525 |
+
]
|
526 |
+
},
|
527 |
+
{
|
528 |
+
"relation_type": "hypernym",
|
529 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
530 |
+
"examples": [
|
531 |
+
"The house looks north",
|
532 |
+
"My backyard look onto the pond"
|
533 |
+
],
|
534 |
+
"wordnet_pos": "v",
|
535 |
+
"related_words": [
|
536 |
+
{
|
537 |
+
"base_form": "lie",
|
538 |
+
"inflected_form": "lying",
|
539 |
+
"matches_context": true
|
540 |
+
}
|
541 |
+
]
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"relation_type": "hyponym",
|
545 |
+
"definition": "be oriented in a certain direction, often with respect to another reference point; be opposite to",
|
546 |
+
"examples": [
|
547 |
+
"The house looks north",
|
548 |
+
"My backyard look onto the pond"
|
549 |
+
],
|
550 |
+
"wordnet_pos": "v",
|
551 |
+
"related_words": [
|
552 |
+
{
|
553 |
+
"base_form": "confront",
|
554 |
+
"inflected_form": "confronting",
|
555 |
+
"matches_context": true
|
556 |
+
}
|
557 |
+
]
|
558 |
+
},
|
559 |
+
{
|
560 |
+
"relation_type": "synonym",
|
561 |
+
"definition": "take charge of or deal with",
|
562 |
+
"examples": [
|
563 |
+
"Could you see about lunch?",
|
564 |
+
"I must attend to this matter"
|
565 |
+
],
|
566 |
+
"wordnet_pos": "v",
|
567 |
+
"related_words": [
|
568 |
+
{
|
569 |
+
"base_form": "attend",
|
570 |
+
"inflected_form": "attending",
|
571 |
+
"matches_context": true
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"base_form": "take care",
|
575 |
+
"inflected_form": "taking care",
|
576 |
+
"matches_context": true
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"base_form": "see",
|
580 |
+
"inflected_form": "seeing",
|
581 |
+
"matches_context": true
|
582 |
+
}
|
583 |
+
]
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"relation_type": "hypernym",
|
587 |
+
"definition": "take charge of or deal with",
|
588 |
+
"examples": [
|
589 |
+
"Could you see about lunch?",
|
590 |
+
"I must attend to this matter"
|
591 |
+
],
|
592 |
+
"wordnet_pos": "v",
|
593 |
+
"related_words": [
|
594 |
+
{
|
595 |
+
"base_form": "care",
|
596 |
+
"inflected_form": "caring",
|
597 |
+
"matches_context": true
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"base_form": "give care",
|
601 |
+
"inflected_form": "giving care",
|
602 |
+
"matches_context": true
|
603 |
+
}
|
604 |
+
]
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"relation_type": "hyponym",
|
608 |
+
"definition": "take charge of or deal with",
|
609 |
+
"examples": [
|
610 |
+
"Could you see about lunch?",
|
611 |
+
"I must attend to this matter"
|
612 |
+
],
|
613 |
+
"wordnet_pos": "v",
|
614 |
+
"related_words": [
|
615 |
+
{
|
616 |
+
"base_form": "tend",
|
617 |
+
"inflected_form": "tending",
|
618 |
+
"matches_context": true
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"base_form": "minister",
|
622 |
+
"inflected_form": "ministering",
|
623 |
+
"matches_context": true
|
624 |
+
}
|
625 |
+
]
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"relation_type": "hypernym",
|
629 |
+
"definition": "convey by one's expression",
|
630 |
+
"examples": [
|
631 |
+
"She looked her devotion to me"
|
632 |
+
],
|
633 |
+
"wordnet_pos": "v",
|
634 |
+
"related_words": [
|
635 |
+
{
|
636 |
+
"base_form": "convey",
|
637 |
+
"inflected_form": "conveying",
|
638 |
+
"matches_context": true
|
639 |
+
}
|
640 |
+
]
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"relation_type": "synonym",
|
644 |
+
"definition": "look forward to the probable occurrence of",
|
645 |
+
"examples": [
|
646 |
+
"We were expecting a visit from our relatives",
|
647 |
+
"She is looking to a promotion"
|
648 |
+
],
|
649 |
+
"wordnet_pos": "v",
|
650 |
+
"related_words": [
|
651 |
+
{
|
652 |
+
"base_form": "expect",
|
653 |
+
"inflected_form": "expecting",
|
654 |
+
"matches_context": true
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"base_form": "await",
|
658 |
+
"inflected_form": "awaiting",
|
659 |
+
"matches_context": true
|
660 |
+
},
|
661 |
+
{
|
662 |
+
"base_form": "wait",
|
663 |
+
"inflected_form": "waiting",
|
664 |
+
"matches_context": true
|
665 |
+
}
|
666 |
+
]
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"relation_type": "hyponym",
|
670 |
+
"definition": "look forward to the probable occurrence of",
|
671 |
+
"examples": [
|
672 |
+
"We were expecting a visit from our relatives",
|
673 |
+
"She is looking to a promotion"
|
674 |
+
],
|
675 |
+
"wordnet_pos": "v",
|
676 |
+
"related_words": [
|
677 |
+
{
|
678 |
+
"base_form": "look forward",
|
679 |
+
"inflected_form": "looking forward",
|
680 |
+
"matches_context": true
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"base_form": "anticipate",
|
684 |
+
"inflected_form": "anticipating",
|
685 |
+
"matches_context": true
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"base_form": "look for",
|
689 |
+
"inflected_form": "looking for",
|
690 |
+
"matches_context": true
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"base_form": "look to",
|
694 |
+
"inflected_form": "looking to",
|
695 |
+
"matches_context": true
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"base_form": "hang on",
|
699 |
+
"inflected_form": "hanging on",
|
700 |
+
"matches_context": true
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"base_form": "hold the line",
|
704 |
+
"inflected_form": "holding the line",
|
705 |
+
"matches_context": true
|
706 |
+
},
|
707 |
+
{
|
708 |
+
"base_form": "hold on",
|
709 |
+
"inflected_form": "holding on",
|
710 |
+
"matches_context": true
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"base_form": "expect",
|
714 |
+
"inflected_form": "expecting",
|
715 |
+
"matches_context": true
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"base_form": "look to",
|
719 |
+
"inflected_form": "looking to",
|
720 |
+
"matches_context": true
|
721 |
+
}
|
722 |
+
]
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"relation_type": "hypernym",
|
726 |
+
"definition": "accord in appearance with",
|
727 |
+
"examples": [
|
728 |
+
"You don't look your age!"
|
729 |
+
],
|
730 |
+
"wordnet_pos": "v",
|
731 |
+
"related_words": [
|
732 |
+
{
|
733 |
+
"base_form": "match",
|
734 |
+
"inflected_form": "matching",
|
735 |
+
"matches_context": true
|
736 |
+
},
|
737 |
+
{
|
738 |
+
"base_form": "fit",
|
739 |
+
"inflected_form": "fitting",
|
740 |
+
"matches_context": true
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"base_form": "correspond",
|
744 |
+
"inflected_form": "corresponding",
|
745 |
+
"matches_context": true
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"base_form": "check",
|
749 |
+
"inflected_form": "checking",
|
750 |
+
"matches_context": true
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"base_form": "jibe",
|
754 |
+
"inflected_form": "jibing",
|
755 |
+
"matches_context": true
|
756 |
+
},
|
757 |
+
{
|
758 |
+
"base_form": "gibe",
|
759 |
+
"inflected_form": "gibing",
|
760 |
+
"matches_context": true
|
761 |
+
},
|
762 |
+
{
|
763 |
+
"base_form": "tally",
|
764 |
+
"inflected_form": "tallying",
|
765 |
+
"matches_context": true
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"base_form": "agree",
|
769 |
+
"inflected_form": "agreeing",
|
770 |
+
"matches_context": true
|
771 |
+
}
|
772 |
+
]
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"relation_type": "synonym",
|
776 |
+
"definition": "have faith or confidence in",
|
777 |
+
"examples": [
|
778 |
+
"you can count on me to help you any time",
|
779 |
+
"Look to your friends for support"
|
780 |
+
],
|
781 |
+
"wordnet_pos": "v",
|
782 |
+
"related_words": [
|
783 |
+
{
|
784 |
+
"base_form": "count",
|
785 |
+
"inflected_form": "counting",
|
786 |
+
"matches_context": true
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"base_form": "bet",
|
790 |
+
"inflected_form": "betting",
|
791 |
+
"matches_context": true
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"base_form": "depend",
|
795 |
+
"inflected_form": "depending",
|
796 |
+
"matches_context": true
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"base_form": "swear",
|
800 |
+
"inflected_form": "swearing",
|
801 |
+
"matches_context": true
|
802 |
+
},
|
803 |
+
{
|
804 |
+
"base_form": "rely",
|
805 |
+
"inflected_form": "relying",
|
806 |
+
"matches_context": true
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"base_form": "bank",
|
810 |
+
"inflected_form": "banking",
|
811 |
+
"matches_context": true
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"base_form": "calculate",
|
815 |
+
"inflected_form": "calculating",
|
816 |
+
"matches_context": true
|
817 |
+
},
|
818 |
+
{
|
819 |
+
"base_form": "reckon",
|
820 |
+
"inflected_form": "reckoning",
|
821 |
+
"matches_context": true
|
822 |
+
}
|
823 |
+
]
|
824 |
+
},
|
825 |
+
{
|
826 |
+
"relation_type": "hypernym",
|
827 |
+
"definition": "have faith or confidence in",
|
828 |
+
"examples": [
|
829 |
+
"you can count on me to help you any time",
|
830 |
+
"Look to your friends for support"
|
831 |
+
],
|
832 |
+
"wordnet_pos": "v",
|
833 |
+
"related_words": [
|
834 |
+
{
|
835 |
+
"base_form": "trust",
|
836 |
+
"inflected_form": "trusting",
|
837 |
+
"matches_context": true
|
838 |
+
}
|
839 |
+
]
|
840 |
+
}
|
841 |
+
],
|
842 |
+
"debug_info": {
|
843 |
+
"spacy_token_indices": {
|
844 |
+
"start": 42,
|
845 |
+
"end": 49
|
846 |
+
},
|
847 |
+
"lemma": "look"
|
848 |
+
}
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"original_word": "woman",
|
852 |
+
"original_indices": {
|
853 |
+
"start": 22,
|
854 |
+
"end": 27
|
855 |
+
},
|
856 |
+
"context_info": {
|
857 |
+
"pos": "NOUN",
|
858 |
+
"sentence": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
859 |
+
"grammatical_form": "NN",
|
860 |
+
"context_words": [
|
861 |
+
"a",
|
862 |
+
"rather",
|
863 |
+
"severe",
|
864 |
+
"-",
|
865 |
+
"looking",
|
866 |
+
"woman",
|
867 |
+
"who",
|
868 |
+
"was",
|
869 |
+
"wearing",
|
870 |
+
"square",
|
871 |
+
"glasses"
|
872 |
+
],
|
873 |
+
"dependency": "pobj"
|
874 |
+
},
|
875 |
+
"related_word_groups": [
|
876 |
+
{
|
877 |
+
"relation_type": "synonym",
|
878 |
+
"definition": "an adult female person (as opposed to a man)",
|
879 |
+
"examples": [
|
880 |
+
"the woman kept house while the man hunted"
|
881 |
+
],
|
882 |
+
"wordnet_pos": "n",
|
883 |
+
"related_words": [
|
884 |
+
{
|
885 |
+
"base_form": "adult female",
|
886 |
+
"inflected_form": "adult female",
|
887 |
+
"matches_context": false
|
888 |
+
}
|
889 |
+
]
|
890 |
+
},
|
891 |
+
{
|
892 |
+
"relation_type": "antonym",
|
893 |
+
"definition": "",
|
894 |
+
"examples": [],
|
895 |
+
"related_words": [
|
896 |
+
{
|
897 |
+
"base_form": "man",
|
898 |
+
"inflected_form": "man",
|
899 |
+
"matches_context": false
|
900 |
+
}
|
901 |
+
]
|
902 |
+
},
|
903 |
+
{
|
904 |
+
"relation_type": "hypernym",
|
905 |
+
"definition": "an adult female person (as opposed to a man)",
|
906 |
+
"examples": [
|
907 |
+
"the woman kept house while the man hunted"
|
908 |
+
],
|
909 |
+
"wordnet_pos": "n",
|
910 |
+
"related_words": [
|
911 |
+
{
|
912 |
+
"base_form": "female",
|
913 |
+
"inflected_form": "female",
|
914 |
+
"matches_context": false
|
915 |
+
},
|
916 |
+
{
|
917 |
+
"base_form": "female person",
|
918 |
+
"inflected_form": "female person",
|
919 |
+
"matches_context": false
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"base_form": "adult",
|
923 |
+
"inflected_form": "adult",
|
924 |
+
"matches_context": false
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"base_form": "grownup",
|
928 |
+
"inflected_form": "grownup",
|
929 |
+
"matches_context": false
|
930 |
+
}
|
931 |
+
]
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"relation_type": "hyponym",
|
935 |
+
"definition": "an adult female person (as opposed to a man)",
|
936 |
+
"examples": [
|
937 |
+
"the woman kept house while the man hunted"
|
938 |
+
],
|
939 |
+
"wordnet_pos": "n",
|
940 |
+
"related_words": [
|
941 |
+
{
|
942 |
+
"base_form": "jezebel",
|
943 |
+
"inflected_form": "jezebel",
|
944 |
+
"matches_context": false
|
945 |
+
},
|
946 |
+
{
|
947 |
+
"base_form": "mother figure",
|
948 |
+
"inflected_form": "mother figure",
|
949 |
+
"matches_context": false
|
950 |
+
},
|
951 |
+
{
|
952 |
+
"base_form": "smasher",
|
953 |
+
"inflected_form": "smasher",
|
954 |
+
"matches_context": false
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"base_form": "stunner",
|
958 |
+
"inflected_form": "stunner",
|
959 |
+
"matches_context": false
|
960 |
+
},
|
961 |
+
{
|
962 |
+
"base_form": "knockout",
|
963 |
+
"inflected_form": "knockout",
|
964 |
+
"matches_context": false
|
965 |
+
},
|
966 |
+
{
|
967 |
+
"base_form": "beauty",
|
968 |
+
"inflected_form": "beauty",
|
969 |
+
"matches_context": false
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"base_form": "ravisher",
|
973 |
+
"inflected_form": "ravisher",
|
974 |
+
"matches_context": false
|
975 |
+
},
|
976 |
+
{
|
977 |
+
"base_form": "sweetheart",
|
978 |
+
"inflected_form": "sweetheart",
|
979 |
+
"matches_context": false
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"base_form": "peach",
|
983 |
+
"inflected_form": "peach",
|
984 |
+
"matches_context": false
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"base_form": "lulu",
|
988 |
+
"inflected_form": "lulu",
|
989 |
+
"matches_context": false
|
990 |
+
},
|
991 |
+
{
|
992 |
+
"base_form": "looker",
|
993 |
+
"inflected_form": "looker",
|
994 |
+
"matches_context": false
|
995 |
+
},
|
996 |
+
{
|
997 |
+
"base_form": "mantrap",
|
998 |
+
"inflected_form": "mantrap",
|
999 |
+
"matches_context": false
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"base_form": "dish",
|
1003 |
+
"inflected_form": "dish",
|
1004 |
+
"matches_context": false
|
1005 |
+
},
|
1006 |
+
{
|
1007 |
+
"base_form": "B-girl",
|
1008 |
+
"inflected_form": "b-girl",
|
1009 |
+
"matches_context": false
|
1010 |
+
},
|
1011 |
+
{
|
1012 |
+
"base_form": "bar girl",
|
1013 |
+
"inflected_form": "bar girl",
|
1014 |
+
"matches_context": false
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"base_form": "heroine",
|
1018 |
+
"inflected_form": "heroine",
|
1019 |
+
"matches_context": false
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"base_form": "prostitute",
|
1023 |
+
"inflected_form": "prostitute",
|
1024 |
+
"matches_context": false
|
1025 |
+
},
|
1026 |
+
{
|
1027 |
+
"base_form": "cocotte",
|
1028 |
+
"inflected_form": "cocotte",
|
1029 |
+
"matches_context": false
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"base_form": "whore",
|
1033 |
+
"inflected_form": "whore",
|
1034 |
+
"matches_context": false
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"base_form": "harlot",
|
1038 |
+
"inflected_form": "harlot",
|
1039 |
+
"matches_context": false
|
1040 |
+
},
|
1041 |
+
{
|
1042 |
+
"base_form": "bawd",
|
1043 |
+
"inflected_form": "bawd",
|
1044 |
+
"matches_context": false
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"base_form": "tart",
|
1048 |
+
"inflected_form": "tart",
|
1049 |
+
"matches_context": false
|
1050 |
+
},
|
1051 |
+
{
|
1052 |
+
"base_form": "cyprian",
|
1053 |
+
"inflected_form": "cyprian",
|
1054 |
+
"matches_context": false
|
1055 |
+
},
|
1056 |
+
{
|
1057 |
+
"base_form": "fancy woman",
|
1058 |
+
"inflected_form": "fancy woman",
|
1059 |
+
"matches_context": false
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"base_form": "working girl",
|
1063 |
+
"inflected_form": "working girl",
|
1064 |
+
"matches_context": false
|
1065 |
+
},
|
1066 |
+
{
|
1067 |
+
"base_form": "sporting lady",
|
1068 |
+
"inflected_form": "sporting lady",
|
1069 |
+
"matches_context": false
|
1070 |
+
},
|
1071 |
+
{
|
1072 |
+
"base_form": "lady of pleasure",
|
1073 |
+
"inflected_form": "lady of pleasure",
|
1074 |
+
"matches_context": false
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"base_form": "woman of the street",
|
1078 |
+
"inflected_form": "woman of the street",
|
1079 |
+
"matches_context": false
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"base_form": "inamorata",
|
1083 |
+
"inflected_form": "inamorata",
|
1084 |
+
"matches_context": false
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"base_form": "cat",
|
1088 |
+
"inflected_form": "cat",
|
1089 |
+
"matches_context": false
|
1090 |
+
},
|
1091 |
+
{
|
1092 |
+
"base_form": "girl",
|
1093 |
+
"inflected_form": "girl",
|
1094 |
+
"matches_context": false
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"base_form": "miss",
|
1098 |
+
"inflected_form": "miss",
|
1099 |
+
"matches_context": false
|
1100 |
+
},
|
1101 |
+
{
|
1102 |
+
"base_form": "missy",
|
1103 |
+
"inflected_form": "missy",
|
1104 |
+
"matches_context": false
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"base_form": "young lady",
|
1108 |
+
"inflected_form": "young lady",
|
1109 |
+
"matches_context": false
|
1110 |
+
},
|
1111 |
+
{
|
1112 |
+
"base_form": "young woman",
|
1113 |
+
"inflected_form": "young woman",
|
1114 |
+
"matches_context": false
|
1115 |
+
},
|
1116 |
+
{
|
1117 |
+
"base_form": "fille",
|
1118 |
+
"inflected_form": "fille",
|
1119 |
+
"matches_context": false
|
1120 |
+
},
|
1121 |
+
{
|
1122 |
+
"base_form": "matriarch",
|
1123 |
+
"inflected_form": "matriarch",
|
1124 |
+
"matches_context": false
|
1125 |
+
},
|
1126 |
+
{
|
1127 |
+
"base_form": "wife",
|
1128 |
+
"inflected_form": "wife",
|
1129 |
+
"matches_context": false
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"base_form": "married woman",
|
1133 |
+
"inflected_form": "married woman",
|
1134 |
+
"matches_context": false
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"base_form": "girlfriend",
|
1138 |
+
"inflected_form": "girlfriend",
|
1139 |
+
"matches_context": false
|
1140 |
+
},
|
1141 |
+
{
|
1142 |
+
"base_form": "Cinderella",
|
1143 |
+
"inflected_form": "cinderella",
|
1144 |
+
"matches_context": false
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"base_form": "bachelor girl",
|
1148 |
+
"inflected_form": "bachelor girl",
|
1149 |
+
"matches_context": false
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"base_form": "bachelorette",
|
1153 |
+
"inflected_form": "bachelorette",
|
1154 |
+
"matches_context": false
|
1155 |
+
},
|
1156 |
+
{
|
1157 |
+
"base_form": "vestal",
|
1158 |
+
"inflected_form": "vestal",
|
1159 |
+
"matches_context": false
|
1160 |
+
},
|
1161 |
+
{
|
1162 |
+
"base_form": "widow",
|
1163 |
+
"inflected_form": "widow",
|
1164 |
+
"matches_context": false
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"base_form": "widow woman",
|
1168 |
+
"inflected_form": "widow woman",
|
1169 |
+
"matches_context": false
|
1170 |
+
},
|
1171 |
+
{
|
1172 |
+
"base_form": "Wave",
|
1173 |
+
"inflected_form": "wave",
|
1174 |
+
"matches_context": false
|
1175 |
+
},
|
1176 |
+
{
|
1177 |
+
"base_form": "nymphet",
|
1178 |
+
"inflected_form": "nymphet",
|
1179 |
+
"matches_context": false
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"base_form": "bluestocking",
|
1183 |
+
"inflected_form": "bluestocking",
|
1184 |
+
"matches_context": false
|
1185 |
+
},
|
1186 |
+
{
|
1187 |
+
"base_form": "bas bleu",
|
1188 |
+
"inflected_form": "bas bleu",
|
1189 |
+
"matches_context": false
|
1190 |
+
},
|
1191 |
+
{
|
1192 |
+
"base_form": "coquette",
|
1193 |
+
"inflected_form": "coquette",
|
1194 |
+
"matches_context": false
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"base_form": "flirt",
|
1198 |
+
"inflected_form": "flirt",
|
1199 |
+
"matches_context": false
|
1200 |
+
},
|
1201 |
+
{
|
1202 |
+
"base_form": "vamp",
|
1203 |
+
"inflected_form": "vamp",
|
1204 |
+
"matches_context": false
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"base_form": "vamper",
|
1208 |
+
"inflected_form": "vamper",
|
1209 |
+
"matches_context": false
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"base_form": "minx",
|
1213 |
+
"inflected_form": "minx",
|
1214 |
+
"matches_context": false
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"base_form": "tease",
|
1218 |
+
"inflected_form": "tease",
|
1219 |
+
"matches_context": false
|
1220 |
+
},
|
1221 |
+
{
|
1222 |
+
"base_form": "prickteaser",
|
1223 |
+
"inflected_form": "prickteaser",
|
1224 |
+
"matches_context": false
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"base_form": "bridesmaid",
|
1228 |
+
"inflected_form": "bridesmaid",
|
1229 |
+
"matches_context": false
|
1230 |
+
},
|
1231 |
+
{
|
1232 |
+
"base_form": "maid of honor",
|
1233 |
+
"inflected_form": "maid of honor",
|
1234 |
+
"matches_context": false
|
1235 |
+
},
|
1236 |
+
{
|
1237 |
+
"base_form": "white woman",
|
1238 |
+
"inflected_form": "white woman",
|
1239 |
+
"matches_context": false
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"base_form": "wonder woman",
|
1243 |
+
"inflected_form": "wonder woman",
|
1244 |
+
"matches_context": false
|
1245 |
+
},
|
1246 |
+
{
|
1247 |
+
"base_form": "Wac",
|
1248 |
+
"inflected_form": "wac",
|
1249 |
+
"matches_context": false
|
1250 |
+
},
|
1251 |
+
{
|
1252 |
+
"base_form": "mestiza",
|
1253 |
+
"inflected_form": "mestiza",
|
1254 |
+
"matches_context": false
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"base_form": "nanny",
|
1258 |
+
"inflected_form": "nanny",
|
1259 |
+
"matches_context": false
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"base_form": "nursemaid",
|
1263 |
+
"inflected_form": "nursemaid",
|
1264 |
+
"matches_context": false
|
1265 |
+
},
|
1266 |
+
{
|
1267 |
+
"base_form": "nurse",
|
1268 |
+
"inflected_form": "nurse",
|
1269 |
+
"matches_context": false
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"base_form": "debutante",
|
1273 |
+
"inflected_form": "debutante",
|
1274 |
+
"matches_context": false
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"base_form": "deb",
|
1278 |
+
"inflected_form": "deb",
|
1279 |
+
"matches_context": false
|
1280 |
+
},
|
1281 |
+
{
|
1282 |
+
"base_form": "dominatrix",
|
1283 |
+
"inflected_form": "dominatrix",
|
1284 |
+
"matches_context": false
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"base_form": "girlfriend",
|
1288 |
+
"inflected_form": "girlfriend",
|
1289 |
+
"matches_context": false
|
1290 |
+
},
|
1291 |
+
{
|
1292 |
+
"base_form": "girl",
|
1293 |
+
"inflected_form": "girl",
|
1294 |
+
"matches_context": false
|
1295 |
+
},
|
1296 |
+
{
|
1297 |
+
"base_form": "lady friend",
|
1298 |
+
"inflected_form": "lady friend",
|
1299 |
+
"matches_context": false
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"base_form": "unmarried woman",
|
1303 |
+
"inflected_form": "unmarried woman",
|
1304 |
+
"matches_context": false
|
1305 |
+
},
|
1306 |
+
{
|
1307 |
+
"base_form": "donna",
|
1308 |
+
"inflected_form": "donna",
|
1309 |
+
"matches_context": false
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"base_form": "eyeful",
|
1313 |
+
"inflected_form": "eyeful",
|
1314 |
+
"matches_context": false
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"base_form": "divorcee",
|
1318 |
+
"inflected_form": "divorcee",
|
1319 |
+
"matches_context": false
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"base_form": "grass widow",
|
1323 |
+
"inflected_form": "grass widow",
|
1324 |
+
"matches_context": false
|
1325 |
+
},
|
1326 |
+
{
|
1327 |
+
"base_form": "amazon",
|
1328 |
+
"inflected_form": "amazon",
|
1329 |
+
"matches_context": false
|
1330 |
+
},
|
1331 |
+
{
|
1332 |
+
"base_form": "virago",
|
1333 |
+
"inflected_form": "virago",
|
1334 |
+
"matches_context": false
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"base_form": "lady",
|
1338 |
+
"inflected_form": "lady",
|
1339 |
+
"matches_context": false
|
1340 |
+
},
|
1341 |
+
{
|
1342 |
+
"base_form": "maenad",
|
1343 |
+
"inflected_form": "maenad",
|
1344 |
+
"matches_context": false
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"base_form": "mistress",
|
1348 |
+
"inflected_form": "mistress",
|
1349 |
+
"matches_context": false
|
1350 |
+
},
|
1351 |
+
{
|
1352 |
+
"base_form": "kept woman",
|
1353 |
+
"inflected_form": "kept woman",
|
1354 |
+
"matches_context": false
|
1355 |
+
},
|
1356 |
+
{
|
1357 |
+
"base_form": "fancy woman",
|
1358 |
+
"inflected_form": "fancy woman",
|
1359 |
+
"matches_context": false
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"base_form": "nymph",
|
1363 |
+
"inflected_form": "nymph",
|
1364 |
+
"matches_context": false
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"base_form": "houri",
|
1368 |
+
"inflected_form": "houri",
|
1369 |
+
"matches_context": false
|
1370 |
+
},
|
1371 |
+
{
|
1372 |
+
"base_form": "geisha",
|
1373 |
+
"inflected_form": "geisha",
|
1374 |
+
"matches_context": false
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"base_form": "geisha girl",
|
1378 |
+
"inflected_form": "geisha girl",
|
1379 |
+
"matches_context": false
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"base_form": "matriarch",
|
1383 |
+
"inflected_form": "matriarch",
|
1384 |
+
"matches_context": false
|
1385 |
+
},
|
1386 |
+
{
|
1387 |
+
"base_form": "materfamilias",
|
1388 |
+
"inflected_form": "materfamilias",
|
1389 |
+
"matches_context": false
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"base_form": "matron",
|
1393 |
+
"inflected_form": "matron",
|
1394 |
+
"matches_context": false
|
1395 |
+
},
|
1396 |
+
{
|
1397 |
+
"base_form": "baggage",
|
1398 |
+
"inflected_form": "baggage",
|
1399 |
+
"matches_context": false
|
1400 |
+
},
|
1401 |
+
{
|
1402 |
+
"base_form": "broad",
|
1403 |
+
"inflected_form": "broad",
|
1404 |
+
"matches_context": false
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"base_form": "girl",
|
1408 |
+
"inflected_form": "girl",
|
1409 |
+
"matches_context": false
|
1410 |
+
},
|
1411 |
+
{
|
1412 |
+
"base_form": "enchantress",
|
1413 |
+
"inflected_form": "enchantress",
|
1414 |
+
"matches_context": false
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"base_form": "temptress",
|
1418 |
+
"inflected_form": "temptress",
|
1419 |
+
"matches_context": false
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"base_form": "siren",
|
1423 |
+
"inflected_form": "siren",
|
1424 |
+
"matches_context": false
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"base_form": "Delilah",
|
1428 |
+
"inflected_form": "delilah",
|
1429 |
+
"matches_context": false
|
1430 |
+
},
|
1431 |
+
{
|
1432 |
+
"base_form": "femme fatale",
|
1433 |
+
"inflected_form": "femme fatale",
|
1434 |
+
"matches_context": false
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"base_form": "gravida",
|
1438 |
+
"inflected_form": "gravida",
|
1439 |
+
"matches_context": false
|
1440 |
+
},
|
1441 |
+
{
|
1442 |
+
"base_form": "jilt",
|
1443 |
+
"inflected_form": "jilt",
|
1444 |
+
"matches_context": false
|
1445 |
+
},
|
1446 |
+
{
|
1447 |
+
"base_form": "maenad",
|
1448 |
+
"inflected_form": "maenad",
|
1449 |
+
"matches_context": false
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"base_form": "nullipara",
|
1453 |
+
"inflected_form": "nullipara",
|
1454 |
+
"matches_context": false
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"base_form": "shiksa",
|
1458 |
+
"inflected_form": "shiksa",
|
1459 |
+
"matches_context": false
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"base_form": "shikse",
|
1463 |
+
"inflected_form": "shikse",
|
1464 |
+
"matches_context": false
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"base_form": "ex-wife",
|
1468 |
+
"inflected_form": "ex-wife",
|
1469 |
+
"matches_context": false
|
1470 |
+
},
|
1471 |
+
{
|
1472 |
+
"base_form": "ex",
|
1473 |
+
"inflected_form": "ex",
|
1474 |
+
"matches_context": false
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"base_form": "gold digger",
|
1478 |
+
"inflected_form": "gold digger",
|
1479 |
+
"matches_context": false
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"base_form": "old woman",
|
1483 |
+
"inflected_form": "old woman",
|
1484 |
+
"matches_context": false
|
1485 |
+
},
|
1486 |
+
{
|
1487 |
+
"base_form": "dame",
|
1488 |
+
"inflected_form": "dame",
|
1489 |
+
"matches_context": false
|
1490 |
+
},
|
1491 |
+
{
|
1492 |
+
"base_form": "madam",
|
1493 |
+
"inflected_form": "madam",
|
1494 |
+
"matches_context": false
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"base_form": "ma'am",
|
1498 |
+
"inflected_form": "ma'am",
|
1499 |
+
"matches_context": false
|
1500 |
+
},
|
1501 |
+
{
|
1502 |
+
"base_form": "lady",
|
1503 |
+
"inflected_form": "lady",
|
1504 |
+
"matches_context": false
|
1505 |
+
},
|
1506 |
+
{
|
1507 |
+
"base_form": "gentlewoman",
|
1508 |
+
"inflected_form": "gentlewoman",
|
1509 |
+
"matches_context": false
|
1510 |
+
},
|
1511 |
+
{
|
1512 |
+
"base_form": "sylph",
|
1513 |
+
"inflected_form": "sylph",
|
1514 |
+
"matches_context": false
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"base_form": "ball-buster",
|
1518 |
+
"inflected_form": "ball-buster",
|
1519 |
+
"matches_context": false
|
1520 |
+
},
|
1521 |
+
{
|
1522 |
+
"base_form": "ball-breaker",
|
1523 |
+
"inflected_form": "ball-breaker",
|
1524 |
+
"matches_context": false
|
1525 |
+
}
|
1526 |
+
]
|
1527 |
+
},
|
1528 |
+
{
|
1529 |
+
"relation_type": "meronym",
|
1530 |
+
"definition": "an adult female person (as opposed to a man)",
|
1531 |
+
"examples": [
|
1532 |
+
"the woman kept house while the man hunted"
|
1533 |
+
],
|
1534 |
+
"wordnet_pos": "n",
|
1535 |
+
"related_words": [
|
1536 |
+
{
|
1537 |
+
"base_form": "adult female body",
|
1538 |
+
"inflected_form": "adult female body",
|
1539 |
+
"matches_context": false
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"base_form": "woman's body",
|
1543 |
+
"inflected_form": "woman's body",
|
1544 |
+
"matches_context": false
|
1545 |
+
}
|
1546 |
+
]
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"relation_type": "antonym",
|
1550 |
+
"definition": "",
|
1551 |
+
"examples": [],
|
1552 |
+
"related_words": [
|
1553 |
+
{
|
1554 |
+
"base_form": "man",
|
1555 |
+
"inflected_form": "man",
|
1556 |
+
"matches_context": false
|
1557 |
+
}
|
1558 |
+
]
|
1559 |
+
},
|
1560 |
+
{
|
1561 |
+
"relation_type": "hypernym",
|
1562 |
+
"definition": "a female person who plays a significant role (wife or mistress or girlfriend) in the life of a particular man",
|
1563 |
+
"examples": [
|
1564 |
+
"he was faithful to his woman"
|
1565 |
+
],
|
1566 |
+
"wordnet_pos": "n",
|
1567 |
+
"related_words": [
|
1568 |
+
{
|
1569 |
+
"base_form": "female",
|
1570 |
+
"inflected_form": "female",
|
1571 |
+
"matches_context": false
|
1572 |
+
},
|
1573 |
+
{
|
1574 |
+
"base_form": "female person",
|
1575 |
+
"inflected_form": "female person",
|
1576 |
+
"matches_context": false
|
1577 |
+
}
|
1578 |
+
]
|
1579 |
+
},
|
1580 |
+
{
|
1581 |
+
"relation_type": "synonym",
|
1582 |
+
"definition": "a human female employed to do housework",
|
1583 |
+
"examples": [
|
1584 |
+
"the char will clean the carpet",
|
1585 |
+
"I have a woman who comes in four hours a day while I write"
|
1586 |
+
],
|
1587 |
+
"wordnet_pos": "n",
|
1588 |
+
"related_words": [
|
1589 |
+
{
|
1590 |
+
"base_form": "charwoman",
|
1591 |
+
"inflected_form": "charwoman",
|
1592 |
+
"matches_context": false
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"base_form": "char",
|
1596 |
+
"inflected_form": "char",
|
1597 |
+
"matches_context": false
|
1598 |
+
},
|
1599 |
+
{
|
1600 |
+
"base_form": "cleaning woman",
|
1601 |
+
"inflected_form": "cleaning woman",
|
1602 |
+
"matches_context": false
|
1603 |
+
},
|
1604 |
+
{
|
1605 |
+
"base_form": "cleaning lady",
|
1606 |
+
"inflected_form": "cleaning lady",
|
1607 |
+
"matches_context": false
|
1608 |
+
}
|
1609 |
+
]
|
1610 |
+
},
|
1611 |
+
{
|
1612 |
+
"relation_type": "hypernym",
|
1613 |
+
"definition": "a human female employed to do housework",
|
1614 |
+
"examples": [
|
1615 |
+
"the char will clean the carpet",
|
1616 |
+
"I have a woman who comes in four hours a day while I write"
|
1617 |
+
],
|
1618 |
+
"wordnet_pos": "n",
|
1619 |
+
"related_words": [
|
1620 |
+
{
|
1621 |
+
"base_form": "cleaner",
|
1622 |
+
"inflected_form": "cleaner",
|
1623 |
+
"matches_context": false
|
1624 |
+
}
|
1625 |
+
]
|
1626 |
+
},
|
1627 |
+
{
|
1628 |
+
"relation_type": "synonym",
|
1629 |
+
"definition": "women as a class",
|
1630 |
+
"examples": [
|
1631 |
+
"it's an insult to American womanhood",
|
1632 |
+
"woman is the glory of creation"
|
1633 |
+
],
|
1634 |
+
"wordnet_pos": "n",
|
1635 |
+
"related_words": [
|
1636 |
+
{
|
1637 |
+
"base_form": "womanhood",
|
1638 |
+
"inflected_form": "womanhood",
|
1639 |
+
"matches_context": false
|
1640 |
+
},
|
1641 |
+
{
|
1642 |
+
"base_form": "fair sex",
|
1643 |
+
"inflected_form": "fair sex",
|
1644 |
+
"matches_context": false
|
1645 |
+
}
|
1646 |
+
]
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"relation_type": "hypernym",
|
1650 |
+
"definition": "women as a class",
|
1651 |
+
"examples": [
|
1652 |
+
"it's an insult to American womanhood",
|
1653 |
+
"woman is the glory of creation"
|
1654 |
+
],
|
1655 |
+
"wordnet_pos": "n",
|
1656 |
+
"related_words": [
|
1657 |
+
{
|
1658 |
+
"base_form": "class",
|
1659 |
+
"inflected_form": "class",
|
1660 |
+
"matches_context": false
|
1661 |
+
},
|
1662 |
+
{
|
1663 |
+
"base_form": "stratum",
|
1664 |
+
"inflected_form": "stratum",
|
1665 |
+
"matches_context": false
|
1666 |
+
},
|
1667 |
+
{
|
1668 |
+
"base_form": "social class",
|
1669 |
+
"inflected_form": "social class",
|
1670 |
+
"matches_context": false
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"base_form": "socio-economic class",
|
1674 |
+
"inflected_form": "socio-economic class",
|
1675 |
+
"matches_context": false
|
1676 |
+
}
|
1677 |
+
]
|
1678 |
+
},
|
1679 |
+
{
|
1680 |
+
"relation_type": "holonym",
|
1681 |
+
"definition": "women as a class",
|
1682 |
+
"examples": [
|
1683 |
+
"it's an insult to American womanhood",
|
1684 |
+
"woman is the glory of creation"
|
1685 |
+
],
|
1686 |
+
"wordnet_pos": "n",
|
1687 |
+
"related_words": [
|
1688 |
+
{
|
1689 |
+
"base_form": "womankind",
|
1690 |
+
"inflected_form": "womankind",
|
1691 |
+
"matches_context": false
|
1692 |
+
}
|
1693 |
+
]
|
1694 |
+
}
|
1695 |
+
],
|
1696 |
+
"debug_info": {
|
1697 |
+
"spacy_token_indices": {
|
1698 |
+
"start": 50,
|
1699 |
+
"end": 55
|
1700 |
+
},
|
1701 |
+
"lemma": "woman"
|
1702 |
+
}
|
1703 |
+
}
|
1704 |
+
],
|
1705 |
+
"message": "Got 2 synonym groups.",
|
1706 |
+
"duration": 0.0003
|
1707 |
+
}
|
tests/events/response_thesaurus_phrase_inflated_structure.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"$[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$[*].context_info": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].context_info.context_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].context_info.context_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 11}, "$[*].context_info.dependency": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["amod", "pobj"], "sample_count": 2}, "$[*].context_info.grammatical_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["VBG", "NN"], "sample_count": 2}, "$[*].context_info.pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["VERB", "NOUN"], "sample_count": 2}, "$[*].context_info.sentence": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.", "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes."], "sample_count": 2}, "$[*].debug_info": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].debug_info.lemma": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["look", "woman"], "sample_count": 2}, "$[*].debug_info.spacy_token_indices": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].debug_info.spacy_token_indices.end": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["49", "55"], "sample_count": 2}, "$[*].debug_info.spacy_token_indices.start": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["42", "50"], "sample_count": 2}, "$[*].original_indices": {"types": ["dict"], "primary_type": "dict", "is_array": false, "samples": [], "sample_count": 0}, "$[*].original_indices.end": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["21", "27"], "sample_count": 2}, "$[*].original_indices.start": {"types": ["int"], "primary_type": "int", "is_array": false, "samples": ["14", "22"], "sample_count": 2}, "$[*].original_word": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["looking", "woman"], "sample_count": 2}, "$[*].related_word_groups": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 12}, "$[*].related_word_groups[*].definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["perceive with attention; direct one's gaze towards", "give a certain impression or have a certain outward aspect", "give a certain impression or have a certain outward aspect"], "sample_count": 3}, "$[*].related_word_groups[*].examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*].examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$[*].related_word_groups[*].related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$[*].related_word_groups[*].related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 1}, "$[*].related_word_groups[*].related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["look away", "admire", "gaze"], "sample_count": 3}, "$[*].related_word_groups[*].related_words[*].inflected_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["looking away", "admiring", "gazing"], "sample_count": 3}, "$[*].related_word_groups[*].related_words[*].matches_context": {"types": ["bool"], "primary_type": "bool", "is_array": false, "samples": ["True", "True", "True"], "sample_count": 3}, "$[*].related_word_groups[*].relation_type": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["hyponym", "synonym", "hypernym"], "sample_count": 3}, "$[*].related_word_groups[*].wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["v", "v", "v"], "sample_count": 3}}
|
tests/my_ghost_writer/helpers_tests.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_report: dict):
|
4 |
+
for (row_k, row_v), (expected_row_k, expected_row_v) in zip(detailed_report.items(), expected_detailed_report.items()):
|
5 |
+
cls.assertEqual(row_k, expected_row_k)
|
6 |
+
del row_v["samples"]
|
7 |
+
del expected_row_v["samples"]
|
8 |
+
del row_v["sample_count"]
|
9 |
+
del expected_row_v["sample_count"]
|
10 |
+
cls.assertDictEqual(row_v, expected_row_v)
|
tests/my_ghost_writer/test_app.py
CHANGED
@@ -12,6 +12,9 @@ from my_ghost_writer import __version__ as version_module
|
|
12 |
from my_ghost_writer.app import app, mongo_health_check_background_task, lifespan
|
13 |
from my_ghost_writer.constants import app_logger
|
14 |
from tests import EVENTS_FOLDER
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
class TestVersion(unittest.TestCase):
|
@@ -210,11 +213,12 @@ class TestAppEndpoints(unittest.TestCase):
|
|
210 |
# --- /thesaurus-inflated-phrase Endpoint ---
|
211 |
def test_get_synonyms_for_phrase_success(self):
|
212 |
"""Tests the success case for /thesaurus-inflated-phrase."""
|
213 |
-
# Load the expected response from JSON file
|
214 |
import json
|
215 |
-
with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated.json", "r") as
|
216 |
-
expected_response = json.load(
|
217 |
-
|
|
|
|
|
218 |
body = {
|
219 |
"word": "rather severe-looking woman",
|
220 |
"text": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
@@ -223,13 +227,32 @@ class TestAppEndpoints(unittest.TestCase):
|
|
223 |
}
|
224 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
225 |
self.assertEqual(response.status_code, 200)
|
226 |
-
|
227 |
-
|
228 |
-
self.
|
229 |
-
self.
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
# check only the first result
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
def test_get_synonyms_for_phrase_no_synonyms(self):
|
235 |
"""Tests the case where no synonyms are found for the phrase."""
|
@@ -240,8 +263,13 @@ class TestAppEndpoints(unittest.TestCase):
|
|
240 |
"end": 18
|
241 |
}
|
242 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
|
|
243 |
self.assertEqual(response.status_code, 200)
|
244 |
-
|
|
|
|
|
|
|
|
|
245 |
"success": True,
|
246 |
"original_phrase": "some phrase",
|
247 |
"original_indices": {
|
@@ -262,9 +290,17 @@ class TestAppEndpoints(unittest.TestCase):
|
|
262 |
}
|
263 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
264 |
self.assertEqual(response.status_code, 200)
|
|
|
|
|
|
|
|
|
|
|
265 |
self.assertDictEqual(
|
266 |
-
|
267 |
-
{
|
|
|
|
|
|
|
268 |
)
|
269 |
|
270 |
def test_get_synonyms_for_phrase_error_validation(self):
|
|
|
12 |
from my_ghost_writer.app import app, mongo_health_check_background_task, lifespan
|
13 |
from my_ghost_writer.constants import app_logger
|
14 |
from tests import EVENTS_FOLDER
|
15 |
+
from my_ghost_writer.jsonpath_comparator import JSONPathComparator
|
16 |
+
from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
|
17 |
+
from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists
|
18 |
|
19 |
|
20 |
class TestVersion(unittest.TestCase):
|
|
|
213 |
# --- /thesaurus-inflated-phrase Endpoint ---
|
214 |
def test_get_synonyms_for_phrase_success(self):
|
215 |
"""Tests the success case for /thesaurus-inflated-phrase."""
|
|
|
216 |
import json
|
217 |
+
with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated.json", "r") as src:
|
218 |
+
expected_response = json.load(src)
|
219 |
+
with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated_structure.json", "r") as src:
|
220 |
+
expected_detailed_report = json.load(src)
|
221 |
+
self.maxDiff = None
|
222 |
body = {
|
223 |
"word": "rather severe-looking woman",
|
224 |
"text": "Instead he was smiling at a rather severe-looking woman who was wearing square glasses exactly the shape of the markings the cat had had around its eyes.",
|
|
|
227 |
}
|
228 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
229 |
self.assertEqual(response.status_code, 200)
|
230 |
+
response_json = response.json()
|
231 |
+
duration = response_json["duration"]
|
232 |
+
self.assertIsInstance(duration, float)
|
233 |
+
self.assertGreater(duration, 0)
|
234 |
+
del response_json["duration"]
|
235 |
+
del expected_response["duration"]
|
236 |
+
app_logger.info(f"response_json.keys():{response_json.keys()}.")
|
237 |
+
self.assertEqual(response_json["success"], expected_response["success"])
|
238 |
+
self.assertEqual(response_json["original_phrase"], expected_response["original_phrase"])
|
239 |
+
self.assertEqual(response_json["original_indices"], expected_response["original_indices"])
|
240 |
+
self.assertEqual(response_json["message"], expected_response["message"])
|
241 |
# check only the first result
|
242 |
+
comparator = JSONPathComparator()
|
243 |
+
comparison = comparator.compare_structures(response_json["results"], expected_response["results"])
|
244 |
+
assert comparison is not None
|
245 |
+
added = comparison.get('added_paths')
|
246 |
+
removed = comparison.get("removed_paths")
|
247 |
+
self.assertEqual(added, set())
|
248 |
+
self.assertEqual(removed, set())
|
249 |
+
analyzer = JSONPathStructureAnalyzer()
|
250 |
+
analyzer.extract_all_paths(response_json["results"])
|
251 |
+
detailed_report = analyzer.get_detailed_type_report()
|
252 |
+
analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)
|
253 |
+
|
254 |
+
# with open(EVENTS_FOLDER / "response_thesaurus_phrase_inflated_structure.json", "w") as src:
|
255 |
+
# json.dump(detailed_report, src)
|
256 |
|
257 |
def test_get_synonyms_for_phrase_no_synonyms(self):
|
258 |
"""Tests the case where no synonyms are found for the phrase."""
|
|
|
263 |
"end": 18
|
264 |
}
|
265 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
266 |
+
response_json = response.json()
|
267 |
self.assertEqual(response.status_code, 200)
|
268 |
+
duration = response_json["duration"]
|
269 |
+
self.assertIsInstance(duration, float)
|
270 |
+
self.assertGreater(duration, 0)
|
271 |
+
del response_json["duration"]
|
272 |
+
self.assertEqual(response_json, {
|
273 |
"success": True,
|
274 |
"original_phrase": "some phrase",
|
275 |
"original_indices": {
|
|
|
290 |
}
|
291 |
response = self.client.post("/thesaurus-inflated-phrase", json=body)
|
292 |
self.assertEqual(response.status_code, 200)
|
293 |
+
response_json = response.json()
|
294 |
+
duration = response_json["duration"]
|
295 |
+
self.assertIsInstance(duration, float)
|
296 |
+
self.assertGreater(duration, 0)
|
297 |
+
del response_json["duration"]
|
298 |
self.assertDictEqual(
|
299 |
+
response_json,
|
300 |
+
{
|
301 |
+
'success': True, 'original_phrase': 'some phrase', 'original_indices': {'start': 20, 'end': 18},
|
302 |
+
'results': [], 'message': 'No words with synonyms found in the selected phrase.'
|
303 |
+
}
|
304 |
)
|
305 |
|
306 |
def test_get_synonyms_for_phrase_error_validation(self):
|
tests/my_ghost_writer/test_custom_synonym_handler.py
CHANGED
@@ -1,16 +1,20 @@
|
|
1 |
import unittest
|
2 |
|
3 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
|
|
4 |
|
5 |
|
6 |
class TestCustomSynonymHandler(unittest.TestCase):
|
7 |
def test_custom_synonym_handler_add_entry_ok1(self):
|
8 |
word_input = "happy"
|
9 |
-
related_input = [
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
14 |
test_custom_synonym_handler = CustomSynonymHandler()
|
15 |
self.assertEqual(test_custom_synonym_handler.inverted_index, {})
|
16 |
self.assertEqual(test_custom_synonym_handler.lexicon, {})
|
@@ -48,16 +52,16 @@ class TestCustomSynonymHandler(unittest.TestCase):
|
|
48 |
self.assertEqual(test_custom_synonym_handler.lexicon, expected_lexicon)
|
49 |
self.assertEqual(test_custom_synonym_handler.inverted_index, expected_inverted_index)
|
50 |
|
51 |
-
synonyms_related = test_custom_synonym_handler.get_related("happy",
|
52 |
self.assertListEqual(synonyms_related, [
|
53 |
{'definition': 'definition of happy', 'words': ['joy', 'cheer']},
|
54 |
{'definition': 'another definition of happy', 'words': ['content', 'cheerful', 'joyful']}
|
55 |
])
|
56 |
-
antonyms_related = test_custom_synonym_handler.get_related("happy",
|
57 |
self.assertListEqual(antonyms_related, [{'definition': 'definition of sad', 'words': ['sad', 'sadness']}])
|
58 |
|
59 |
test_custom_synonym_handler.add_entry("text", [
|
60 |
-
{'definition': 'definition of text', 'type': 'synonym', 'words': ['word', 'sentence']}
|
61 |
])
|
62 |
self.assertEqual(test_custom_synonym_handler.lexicon, {
|
63 |
**{"text": {'synonym': [{'definition': 'definition of text', 'words': ['word', 'sentence']}]}},
|
|
|
1 |
import unittest
|
2 |
|
3 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
4 |
+
from my_ghost_writer.type_hints import RelatedEntry, TermRelationships
|
5 |
|
6 |
|
7 |
class TestCustomSynonymHandler(unittest.TestCase):
|
8 |
def test_custom_synonym_handler_add_entry_ok1(self):
|
9 |
word_input = "happy"
|
10 |
+
related_input = []
|
11 |
+
for rel in [
|
12 |
+
{'definition': 'definition of happy', 'type': 'synonym', 'words': ['joy', 'cheer']},
|
13 |
+
{'definition': 'definition of sad', 'type': 'antonym', 'words': ['sad', 'sadness']},
|
14 |
+
{'definition': 'another definition of happy', 'type': 'synonym', 'words': ['content', 'cheerful', 'joyful']}
|
15 |
+
]:
|
16 |
+
tmp = RelatedEntry(**rel)
|
17 |
+
related_input.append(tmp)
|
18 |
test_custom_synonym_handler = CustomSynonymHandler()
|
19 |
self.assertEqual(test_custom_synonym_handler.inverted_index, {})
|
20 |
self.assertEqual(test_custom_synonym_handler.lexicon, {})
|
|
|
52 |
self.assertEqual(test_custom_synonym_handler.lexicon, expected_lexicon)
|
53 |
self.assertEqual(test_custom_synonym_handler.inverted_index, expected_inverted_index)
|
54 |
|
55 |
+
synonyms_related = test_custom_synonym_handler.get_related("happy", TermRelationships.SYNONYM)
|
56 |
self.assertListEqual(synonyms_related, [
|
57 |
{'definition': 'definition of happy', 'words': ['joy', 'cheer']},
|
58 |
{'definition': 'another definition of happy', 'words': ['content', 'cheerful', 'joyful']}
|
59 |
])
|
60 |
+
antonyms_related = test_custom_synonym_handler.get_related("happy", TermRelationships.ANTONYM)
|
61 |
self.assertListEqual(antonyms_related, [{'definition': 'definition of sad', 'words': ['sad', 'sadness']}])
|
62 |
|
63 |
test_custom_synonym_handler.add_entry("text", [
|
64 |
+
RelatedEntry(**{'definition': 'definition of text', 'type': 'synonym', 'words': ['word', 'sentence']})
|
65 |
])
|
66 |
self.assertEqual(test_custom_synonym_handler.lexicon, {
|
67 |
**{"text": {'synonym': [{'definition': 'definition of text', 'words': ['word', 'sentence']}]}},
|
tests/my_ghost_writer/test_extract_jsonpaths.py
ADDED
@@ -0,0 +1,1440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
import json
|
3 |
+
|
4 |
+
from my_ghost_writer.constants import app_logger
|
5 |
+
from my_ghost_writer.jsonpath_comparator import (compare_json_with_jsonpath, compare_json_with_jsonpath_and_types,
|
6 |
+
JSONPathComparator, extract_structure_paths, extract_structure_paths_with_types)
|
7 |
+
from my_ghost_writer.jsonpath_extractor import (JSONPathStructureAnalyzer, analyze_with_jsonpath_detailed,
|
8 |
+
analyze_with_jsonpath_types, analyze_with_jsonpath, compare_json_with_jsonpath_structures, analyze_dict_list_simple)
|
9 |
+
|
10 |
+
|
11 |
+
old_json = {
|
12 |
+
"key1": "row 1",
|
13 |
+
"key2": 22,
|
14 |
+
"key_nested1": {
|
15 |
+
"key_nested2": "row 3",
|
16 |
+
"key_nested3": "row 4",
|
17 |
+
"array_nested_4": [
|
18 |
+
"row 5",
|
19 |
+
"row 6",
|
20 |
+
"row 7 nested",
|
21 |
+
{
|
22 |
+
"key_nested4": "row 8",
|
23 |
+
"array_nested_5": ["row 9", "row 10"]
|
24 |
+
}
|
25 |
+
]
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
new_json = {
|
30 |
+
"key1": "row 1",
|
31 |
+
"key2": 22,
|
32 |
+
"key_nested1": {
|
33 |
+
"key_nested2": "row 3",
|
34 |
+
"key_nested3": "row 4",
|
35 |
+
"array_changed_4": [
|
36 |
+
"row 5",
|
37 |
+
"row changed 6",
|
38 |
+
"row 7 nested",
|
39 |
+
{
|
40 |
+
"last_change": "row 8",
|
41 |
+
"array_nested_5": ["row 9", "row 10"]
|
42 |
+
}
|
43 |
+
]
|
44 |
+
}
|
45 |
+
}
|
46 |
+
|
47 |
+
|
48 |
+
class TestJSONPathStructureAnalyzer(unittest.TestCase):
|
49 |
+
"""
|
50 |
+
Test JSONPath structure analysis with the provided nested JSON data
|
51 |
+
"""
|
52 |
+
|
53 |
+
def test_get_paths_with_types_basic(self):
|
54 |
+
"""
|
55 |
+
Test get_paths_with_types with basic data types
|
56 |
+
"""
|
57 |
+
test_data = {
|
58 |
+
"string_field": "hello",
|
59 |
+
"int_field": 42,
|
60 |
+
"float_field": 3.14,
|
61 |
+
"bool_field": True,
|
62 |
+
"null_field": None
|
63 |
+
}
|
64 |
+
|
65 |
+
analyzer = JSONPathStructureAnalyzer()
|
66 |
+
analyzer.extract_all_paths(test_data)
|
67 |
+
paths_with_types = analyzer.get_paths_with_types()
|
68 |
+
|
69 |
+
# Verify basic types
|
70 |
+
self.assertEqual(paths_with_types["$.string_field"], "str")
|
71 |
+
self.assertEqual(paths_with_types["$.int_field"], "int")
|
72 |
+
self.assertEqual(paths_with_types["$.float_field"], "float")
|
73 |
+
self.assertEqual(paths_with_types["$.bool_field"], "bool")
|
74 |
+
self.assertEqual(paths_with_types["$.null_field"], "NoneType")
|
75 |
+
|
76 |
+
def test_get_paths_with_types_arrays(self):
|
77 |
+
"""
|
78 |
+
Test get_paths_with_types with arrays
|
79 |
+
"""
|
80 |
+
test_data = {
|
81 |
+
"simple_array": [1, 2, 3],
|
82 |
+
"empty_array": [],
|
83 |
+
"mixed_array": ["string", 42, True],
|
84 |
+
"nested_array": [[1, 2], [3, 4]]
|
85 |
+
}
|
86 |
+
|
87 |
+
analyzer = JSONPathStructureAnalyzer()
|
88 |
+
analyzer.extract_all_paths(test_data)
|
89 |
+
paths_with_types = analyzer.get_paths_with_types()
|
90 |
+
|
91 |
+
self.assertEqual(paths_with_types["$.simple_array[*]"], "array")
|
92 |
+
self.assertEqual(paths_with_types["$.empty_array[*]"], "array")
|
93 |
+
self.assertEqual(paths_with_types["$.mixed_array[*]"], "array")
|
94 |
+
self.assertEqual(paths_with_types["$.nested_array[*]"], "array")
|
95 |
+
|
96 |
+
def test_get_paths_with_types_with_old_json(self):
|
97 |
+
"""
|
98 |
+
Test get_paths_with_types with the old_json test data
|
99 |
+
"""
|
100 |
+
analyzer = JSONPathStructureAnalyzer()
|
101 |
+
analyzer.extract_all_paths(old_json)
|
102 |
+
paths_with_types = analyzer.get_paths_with_types()
|
103 |
+
|
104 |
+
# Test specific paths from old_json
|
105 |
+
self.assertEqual(paths_with_types["$.key1"], "str")
|
106 |
+
self.assertEqual(paths_with_types["$.key2"], "int")
|
107 |
+
self.assertEqual(paths_with_types["$.key_nested1"], "dict")
|
108 |
+
self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "array")
|
109 |
+
self.assertEqual(paths_with_types["$.key_nested1.key_nested2"], "str")
|
110 |
+
|
111 |
+
# Verify all expected paths are present
|
112 |
+
expected_paths = [
|
113 |
+
"$.key1", "$.key2", "$.key_nested1",
|
114 |
+
"$.key_nested1.key_nested2", "$.key_nested1.key_nested3",
|
115 |
+
"$.key_nested1.array_nested_4[*]"
|
116 |
+
]
|
117 |
+
|
118 |
+
for path in expected_paths:
|
119 |
+
self.assertIn(path, paths_with_types, f"Path {path} should be in paths_with_types")
|
120 |
+
|
121 |
+
def test_get_detailed_type_report_basic(self):
|
122 |
+
"""
|
123 |
+
Test get_detailed_type_report with basic data
|
124 |
+
"""
|
125 |
+
test_data = {
|
126 |
+
"test_field": "sample_value",
|
127 |
+
"array_field": [1, 2, 3]
|
128 |
+
}
|
129 |
+
|
130 |
+
analyzer = JSONPathStructureAnalyzer()
|
131 |
+
analyzer.extract_all_paths(test_data)
|
132 |
+
detailed_report = analyzer.get_detailed_type_report()
|
133 |
+
|
134 |
+
# Test structure of the detailed report
|
135 |
+
self.assertIn("$.test_field", detailed_report)
|
136 |
+
self.assertIn("$.array_field[*]", detailed_report)
|
137 |
+
|
138 |
+
# Test field details
|
139 |
+
field_info = detailed_report["$.test_field"]
|
140 |
+
self.assertIn("types", field_info)
|
141 |
+
self.assertIn("primary_type", field_info)
|
142 |
+
self.assertIn("is_array", field_info)
|
143 |
+
self.assertIn("samples", field_info)
|
144 |
+
self.assertIn("sample_count", field_info)
|
145 |
+
|
146 |
+
# Verify field values
|
147 |
+
self.assertEqual(field_info["primary_type"], "str")
|
148 |
+
self.assertFalse(field_info["is_array"])
|
149 |
+
self.assertIn("sample_value", field_info["samples"])
|
150 |
+
self.assertGreater(field_info["sample_count"], 0)
|
151 |
+
|
152 |
+
# Test array field details
|
153 |
+
array_info = detailed_report["$.array_field[*]"]
|
154 |
+
self.assertTrue(array_info["is_array"])
|
155 |
+
self.assertEqual(array_info["primary_type"], "array")
|
156 |
+
self.assertEqual(array_info['array_length'], 3)
|
157 |
+
|
158 |
+
def test_get_detailed_type_report_with_old_json(self):
|
159 |
+
"""
|
160 |
+
Test get_detailed_type_report with old_json data
|
161 |
+
"""
|
162 |
+
analyzer = JSONPathStructureAnalyzer()
|
163 |
+
analyzer.extract_all_paths(old_json)
|
164 |
+
detailed_report = analyzer.get_detailed_type_report()
|
165 |
+
|
166 |
+
# Test specific fields from old_json
|
167 |
+
key1_info = detailed_report["$.key1"]
|
168 |
+
self.assertEqual(key1_info["primary_type"], "str")
|
169 |
+
self.assertFalse(key1_info["is_array"])
|
170 |
+
self.assertIn("row 1", key1_info["samples"])
|
171 |
+
|
172 |
+
key2_info = detailed_report["$.key2"]
|
173 |
+
self.assertEqual(key2_info["primary_type"], "int")
|
174 |
+
self.assertFalse(key2_info["is_array"])
|
175 |
+
self.assertIn("22", key2_info["samples"])
|
176 |
+
|
177 |
+
# Test array field
|
178 |
+
array_info = detailed_report["$.key_nested1.array_nested_4[*]"]
|
179 |
+
self.assertTrue(array_info["is_array"])
|
180 |
+
self.assertEqual(array_info["primary_type"], "array")
|
181 |
+
self.assertEqual(array_info["array_length"], 4)
|
182 |
+
|
183 |
+
def test_get_detailed_type_report_mixed_types(self):
|
184 |
+
"""
|
185 |
+
Test get_detailed_type_report with mixed types (hypothetical case)
|
186 |
+
"""
|
187 |
+
# Create a scenario where a path might have mixed types
|
188 |
+
analyzer = JSONPathStructureAnalyzer()
|
189 |
+
|
190 |
+
# Manually add mixed type data to test the logic
|
191 |
+
analyzer.paths.add("$.mixed_field")
|
192 |
+
analyzer.types["$.mixed_field"].add("str")
|
193 |
+
analyzer.types["$.mixed_field"].add("int")
|
194 |
+
analyzer.samples["$.mixed_field"] = ["hello", "42"]
|
195 |
+
|
196 |
+
detailed_report = analyzer.get_detailed_type_report()
|
197 |
+
|
198 |
+
mixed_info = detailed_report["$.mixed_field"]
|
199 |
+
self.assertIn("mixed(", mixed_info["primary_type"])
|
200 |
+
self.assertFalse(mixed_info["is_array"])
|
201 |
+
self.assertEqual(len(mixed_info["types"]), 2)
|
202 |
+
|
203 |
+
def test_analyze_with_jsonpath_types_function(self):
|
204 |
+
"""
|
205 |
+
Test the convenience function analyze_with_jsonpath_types
|
206 |
+
"""
|
207 |
+
test_data = {
|
208 |
+
"name": "test",
|
209 |
+
"count": 5,
|
210 |
+
"items": ["a", "b", "c"]
|
211 |
+
}
|
212 |
+
|
213 |
+
paths_with_types = analyze_with_jsonpath_types(test_data)
|
214 |
+
|
215 |
+
# Verify function returns expected structure
|
216 |
+
self.assertIsInstance(paths_with_types, dict)
|
217 |
+
self.assertIn("$.name", paths_with_types)
|
218 |
+
self.assertIn("$.count", paths_with_types)
|
219 |
+
self.assertIn("$.items[*]", paths_with_types)
|
220 |
+
|
221 |
+
# Verify types
|
222 |
+
self.assertEqual(paths_with_types["$.name"], "str")
|
223 |
+
self.assertEqual(paths_with_types["$.count"], "int")
|
224 |
+
self.assertEqual(paths_with_types["$.items[*]"], "array")
|
225 |
+
|
226 |
+
def test_analyze_with_jsonpath_detailed_function(self):
|
227 |
+
"""
|
228 |
+
Test the convenience function analyze_with_jsonpath_detailed
|
229 |
+
"""
|
230 |
+
test_data = {
|
231 |
+
"description": "test description",
|
232 |
+
"tags": ["tag1", "tag2"]
|
233 |
+
}
|
234 |
+
|
235 |
+
detailed_info = analyze_with_jsonpath_detailed(test_data)
|
236 |
+
|
237 |
+
# Verify function returns expected structure
|
238 |
+
self.assertIsInstance(detailed_info, dict)
|
239 |
+
self.assertIn("$.description", detailed_info)
|
240 |
+
self.assertIn("$.tags[*]", detailed_info)
|
241 |
+
|
242 |
+
# Verify detailed structure
|
243 |
+
desc_info = detailed_info["$.description"]
|
244 |
+
self.assertIn("types", desc_info)
|
245 |
+
self.assertIn("primary_type", desc_info)
|
246 |
+
self.assertIn("samples", desc_info)
|
247 |
+
self.assertEqual(desc_info["primary_type"], "str")
|
248 |
+
|
249 |
+
tags_info = detailed_info["$.tags[*]"]
|
250 |
+
self.assertTrue(tags_info["is_array"])
|
251 |
+
self.assertEqual(tags_info["primary_type"], "array")
|
252 |
+
self.assertEqual(tags_info["array_length"], 2)
|
253 |
+
|
254 |
+
def test_get_paths_with_types_empty_data(self):
|
255 |
+
"""
|
256 |
+
Test get_paths_with_types with empty data
|
257 |
+
"""
|
258 |
+
analyzer = JSONPathStructureAnalyzer()
|
259 |
+
analyzer.extract_all_paths({})
|
260 |
+
paths_with_types = analyzer.get_paths_with_types()
|
261 |
+
|
262 |
+
# Should return empty dict for empty input
|
263 |
+
self.assertEqual(len(paths_with_types), 0)
|
264 |
+
|
265 |
+
def test_get_detailed_type_report_empty_data(self):
|
266 |
+
"""
|
267 |
+
Test get_detailed_type_report with empty data
|
268 |
+
"""
|
269 |
+
analyzer = JSONPathStructureAnalyzer()
|
270 |
+
analyzer.extract_all_paths({})
|
271 |
+
detailed_report = analyzer.get_detailed_type_report()
|
272 |
+
|
273 |
+
# Should return empty dict for empty input
|
274 |
+
self.assertEqual(len(detailed_report), 0)
|
275 |
+
|
276 |
+
def test_paths_with_types_comparison_old_vs_new(self):
|
277 |
+
"""
|
278 |
+
Test comparing paths with types between old and new JSON
|
279 |
+
"""
|
280 |
+
analyzer_old = JSONPathStructureAnalyzer()
|
281 |
+
analyzer_old.extract_all_paths(old_json)
|
282 |
+
old_paths_with_types = analyzer_old.get_paths_with_types()
|
283 |
+
|
284 |
+
analyzer_new = JSONPathStructureAnalyzer()
|
285 |
+
analyzer_new.extract_all_paths(new_json)
|
286 |
+
new_paths_with_types = analyzer_new.get_paths_with_types()
|
287 |
+
|
288 |
+
# Find differences
|
289 |
+
old_only = set(old_paths_with_types.keys()) - set(new_paths_with_types.keys())
|
290 |
+
new_only = set(new_paths_with_types.keys()) - set(old_paths_with_types.keys())
|
291 |
+
common = set(old_paths_with_types.keys()) & set(new_paths_with_types.keys())
|
292 |
+
|
293 |
+
# Verify expected differences
|
294 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", old_only)
|
295 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", new_only)
|
296 |
+
|
297 |
+
# Verify common paths have same types
|
298 |
+
for path in common:
|
299 |
+
self.assertEqual(old_paths_with_types[path], new_paths_with_types[path],
|
300 |
+
f"Type mismatch for common path {path}")
|
301 |
+
|
302 |
+
def test_detailed_report_comparison_old_vs_new(self):
|
303 |
+
"""
|
304 |
+
Test comparing detailed reports between old and new JSON
|
305 |
+
"""
|
306 |
+
old_detailed = analyze_with_jsonpath_detailed(old_json)
|
307 |
+
new_detailed = analyze_with_jsonpath_detailed(new_json)
|
308 |
+
|
309 |
+
# Check that common fields have consistent detailed info
|
310 |
+
common_paths = set(old_detailed.keys()) & set(new_detailed.keys())
|
311 |
+
|
312 |
+
for path in common_paths:
|
313 |
+
old_info = old_detailed[path]
|
314 |
+
new_info = new_detailed[path]
|
315 |
+
|
316 |
+
# Primary types should match for common paths
|
317 |
+
self.assertEqual(old_info["primary_type"], new_info["primary_type"],
|
318 |
+
f"Primary type mismatch for {path}")
|
319 |
+
|
320 |
+
# Array status should match
|
321 |
+
self.assertEqual(old_info["is_array"], new_info["is_array"],
|
322 |
+
f"Array status mismatch for {path}")
|
323 |
+
|
324 |
+
def test_integration_all_new_methods(self):
|
325 |
+
"""
|
326 |
+
Integration test using all new methods together
|
327 |
+
"""
|
328 |
+
test_data = {
|
329 |
+
"user": {
|
330 |
+
"name": "John Doe",
|
331 |
+
"age": 30,
|
332 |
+
"hobbies": ["reading", "coding", "gaming"],
|
333 |
+
"profile": {
|
334 |
+
"active": True,
|
335 |
+
"settings": {
|
336 |
+
"theme": "dark",
|
337 |
+
"notifications": False
|
338 |
+
}
|
339 |
+
}
|
340 |
+
}
|
341 |
+
}
|
342 |
+
|
343 |
+
# Test all three approaches
|
344 |
+
structure_report = analyze_with_jsonpath(test_data)
|
345 |
+
paths_with_types = analyze_with_jsonpath_types(test_data)
|
346 |
+
detailed_info = analyze_with_jsonpath_detailed(test_data)
|
347 |
+
|
348 |
+
# Verify all methods found the same paths
|
349 |
+
report_paths = set()
|
350 |
+
for line in structure_report.split('\n'):
|
351 |
+
if ' -- ' in line:
|
352 |
+
path = line.split(' -- ')[0]
|
353 |
+
report_paths.add(path)
|
354 |
+
|
355 |
+
types_paths = set(paths_with_types.keys())
|
356 |
+
detailed_paths = set(detailed_info.keys())
|
357 |
+
|
358 |
+
# All methods should find the same paths
|
359 |
+
self.assertEqual(report_paths, types_paths)
|
360 |
+
self.assertEqual(types_paths, detailed_paths)
|
361 |
+
|
362 |
+
# Verify specific expected paths exist
|
363 |
+
expected_paths = [
|
364 |
+
"$.user",
|
365 |
+
"$.user.name",
|
366 |
+
"$.user.age",
|
367 |
+
"$.user.hobbies[*]",
|
368 |
+
"$.user.profile",
|
369 |
+
"$.user.profile.active",
|
370 |
+
"$.user.profile.settings",
|
371 |
+
"$.user.profile.settings.theme",
|
372 |
+
"$.user.profile.settings.notifications"
|
373 |
+
]
|
374 |
+
|
375 |
+
for path in expected_paths:
|
376 |
+
self.assertIn(path, types_paths, f"Path {path} should be found by all methods")
|
377 |
+
self.assertIn(path, detailed_paths, f"Path {path} should be in detailed info")
|
378 |
+
|
379 |
+
def test_type_consistency_across_methods(self):
|
380 |
+
"""
|
381 |
+
Test that type information is consistent across different methods
|
382 |
+
"""
|
383 |
+
analyzer = JSONPathStructureAnalyzer()
|
384 |
+
analyzer.extract_all_paths(old_json)
|
385 |
+
|
386 |
+
# Get data using different methods
|
387 |
+
# structure_report = analyzer.get_structure_report()
|
388 |
+
paths_with_types = analyzer.get_paths_with_types()
|
389 |
+
detailed_report = analyzer.get_detailed_type_report()
|
390 |
+
|
391 |
+
# For each path, verify consistency
|
392 |
+
for path in paths_with_types:
|
393 |
+
# Detailed report should have the same primary type
|
394 |
+
if path in detailed_report:
|
395 |
+
detailed_type = detailed_report[path]["primary_type"]
|
396 |
+
simple_type = paths_with_types[path]
|
397 |
+
|
398 |
+
# They should match (detailed might have more info for mixed types)
|
399 |
+
if not detailed_type.startswith("mixed("):
|
400 |
+
self.assertEqual(simple_type, detailed_type,
|
401 |
+
f"Type inconsistency for {path}: {simple_type} vs {detailed_type}")
|
402 |
+
|
403 |
+
def test_extract_all_paths_from_old_structure(self):
|
404 |
+
"""
|
405 |
+
Test that analyzer correctly extracts all paths from the old JSON structure
|
406 |
+
"""
|
407 |
+
analyzer = JSONPathStructureAnalyzer()
|
408 |
+
paths = analyzer.extract_all_paths(old_json)
|
409 |
+
|
410 |
+
# Verify the top-level paths
|
411 |
+
self.assertIn("$.key1", paths)
|
412 |
+
self.assertIn("$.key2", paths)
|
413 |
+
self.assertIn("$.key_nested1", paths)
|
414 |
+
|
415 |
+
# Verify the nested object paths
|
416 |
+
self.assertIn("$.key_nested1.key_nested2", paths)
|
417 |
+
self.assertIn("$.key_nested1.key_nested3", paths)
|
418 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", paths)
|
419 |
+
|
420 |
+
# Verify the deeply nested paths (3-4 levels deep)
|
421 |
+
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", paths)
|
422 |
+
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*]", paths)
|
423 |
+
|
424 |
+
def test_extract_all_paths_from_new_structure(self):
|
425 |
+
"""
|
426 |
+
Test that analyzer correctly extracts paths from the new JSON structure
|
427 |
+
"""
|
428 |
+
analyzer = JSONPathStructureAnalyzer()
|
429 |
+
paths = analyzer.extract_all_paths(new_json)
|
430 |
+
|
431 |
+
# Verify renamed the array path
|
432 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", paths)
|
433 |
+
|
434 |
+
# Verify renamed the nested key
|
435 |
+
self.assertIn("$.key_nested1.array_changed_4[*].last_change", paths)
|
436 |
+
|
437 |
+
# Verify unchanged the nested array
|
438 |
+
self.assertIn("$.key_nested1.array_changed_4[*].array_nested_5[*]", paths)
|
439 |
+
|
440 |
+
def test_structure_report_format_old_json(self):
|
441 |
+
"""
|
442 |
+
Test structure report format for old JSON
|
443 |
+
"""
|
444 |
+
analyzer = JSONPathStructureAnalyzer()
|
445 |
+
analyzer.extract_all_paths(old_json)
|
446 |
+
report = analyzer.get_structure_report()
|
447 |
+
|
448 |
+
# Check specific format elements
|
449 |
+
self.assertIn("$.key1 -- row 1", report)
|
450 |
+
self.assertIn("$.key2 -- 22", report)
|
451 |
+
self.assertIn("$.key_nested1.array_nested_4[*] -- array[4]", report)
|
452 |
+
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4 -- row 8", report)
|
453 |
+
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
|
454 |
+
|
455 |
+
def test_structure_report_format_new_json(self):
|
456 |
+
"""
|
457 |
+
Test structure report format for new JSON
|
458 |
+
"""
|
459 |
+
analyzer = JSONPathStructureAnalyzer()
|
460 |
+
analyzer.extract_all_paths(new_json)
|
461 |
+
report = analyzer.get_structure_report()
|
462 |
+
|
463 |
+
# Check renamed elements appear correctly
|
464 |
+
self.assertIn("$.key_nested1.array_changed_4[*] -- array[4]", report)
|
465 |
+
self.assertIn("$.key_nested1.array_changed_4[*].last_change -- row 8", report)
|
466 |
+
|
467 |
+
# Check unchanged elements
|
468 |
+
self.assertIn("$.key1 -- row 1", report)
|
469 |
+
self.assertIn("$.key2 -- 22", report)
|
470 |
+
|
471 |
+
def test_analyze_with_jsonpath_function(self):
|
472 |
+
"""
|
473 |
+
Test the convenience function for structure analysis
|
474 |
+
"""
|
475 |
+
old_report = analyze_with_jsonpath(old_json)
|
476 |
+
new_report = analyze_with_jsonpath(new_json)
|
477 |
+
|
478 |
+
# Verify both reports are valid strings
|
479 |
+
self.assertIsInstance(old_report, str)
|
480 |
+
self.assertGreater(len(old_report), 0)
|
481 |
+
self.assertIsInstance(new_report, str)
|
482 |
+
self.assertGreater(len(new_report), 0)
|
483 |
+
|
484 |
+
# Verify key differences
|
485 |
+
self.assertIn("array_nested_4", old_report)
|
486 |
+
self.assertIn("array_changed_4", new_report)
|
487 |
+
self.assertIn("key_nested4", old_report)
|
488 |
+
self.assertIn("last_change", new_report)
|
489 |
+
|
490 |
+
def test_compare_json_structures_method(self):
|
491 |
+
"""
|
492 |
+
Test the compare_json_structures method directly
|
493 |
+
"""
|
494 |
+
analyzer = JSONPathStructureAnalyzer()
|
495 |
+
analyzer.extract_all_paths(old_json)
|
496 |
+
|
497 |
+
comparison = analyzer.compare_json_structures(new_json)
|
498 |
+
|
499 |
+
# Test all expected keys within the comparison result
|
500 |
+
expected_keys = [
|
501 |
+
"added_paths", "removed_paths", "common_paths",
|
502 |
+
"type_changes", "value_differences", "array_size_changes",
|
503 |
+
"array_lengths_old", "array_lengths_new", "summary"
|
504 |
+
]
|
505 |
+
|
506 |
+
for key in expected_keys:
|
507 |
+
self.assertIn(key, comparison, f"Key {key} should be in comparison result")
|
508 |
+
|
509 |
+
# Test summary statistics
|
510 |
+
summary = comparison["summary"]
|
511 |
+
self.assertGreater(summary["total_paths_old"], 0)
|
512 |
+
self.assertGreater(summary["total_paths_new"], 0)
|
513 |
+
self.assertGreater(summary["paths_removed"], 0)
|
514 |
+
self.assertGreater(summary["paths_added"], 0)
|
515 |
+
|
516 |
+
# Test array length tracking
|
517 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["array_lengths_old"])
|
518 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["array_lengths_new"])
|
519 |
+
self.assertEqual(comparison["array_lengths_old"]["$.key_nested1.array_nested_4[*]"], 4)
|
520 |
+
self.assertEqual(comparison["array_lengths_new"]["$.key_nested1.array_changed_4[*]"], 4)
|
521 |
+
|
522 |
+
def test_get_array_lengths_method(self):
|
523 |
+
"""
|
524 |
+
Test the get_array_lengths method
|
525 |
+
"""
|
526 |
+
test_data = {
|
527 |
+
"empty_array": [],
|
528 |
+
"small_array": [1, 2],
|
529 |
+
"large_array": list(range(100)),
|
530 |
+
"nested": {
|
531 |
+
"inner_array": ["a", "b", "c", "d", "e"]
|
532 |
+
}
|
533 |
+
}
|
534 |
+
|
535 |
+
analyzer = JSONPathStructureAnalyzer()
|
536 |
+
analyzer.extract_all_paths(test_data)
|
537 |
+
array_lengths = analyzer.get_array_lengths()
|
538 |
+
|
539 |
+
# Test all array lengths are captured
|
540 |
+
self.assertEqual(array_lengths["$.empty_array[*]"], 0)
|
541 |
+
self.assertEqual(array_lengths["$.small_array[*]"], 2)
|
542 |
+
self.assertEqual(array_lengths["$.large_array[*]"], 100)
|
543 |
+
self.assertEqual(array_lengths["$.nested.inner_array[*]"], 5)
|
544 |
+
|
545 |
+
# Test that non-array paths are not in array_lengths
|
546 |
+
for path in array_lengths.keys():
|
547 |
+
self.assertTrue(path.endswith("[*]"), f"Array length path {path} should end with [*]")
|
548 |
+
|
549 |
+
def test_value_differences_detection(self):
|
550 |
+
"""
|
551 |
+
Test detection of value changes in compare_json_structures
|
552 |
+
"""
|
553 |
+
old_data = {
|
554 |
+
"name": "John",
|
555 |
+
"age": 25,
|
556 |
+
"city": "New York"
|
557 |
+
}
|
558 |
+
|
559 |
+
new_data = {
|
560 |
+
"name": "John", # unchanged
|
561 |
+
"age": 26, # changed
|
562 |
+
"city": "Boston" # changed
|
563 |
+
}
|
564 |
+
|
565 |
+
analyzer = JSONPathStructureAnalyzer()
|
566 |
+
analyzer.extract_all_paths(old_data)
|
567 |
+
comparison = analyzer.compare_json_structures(new_data)
|
568 |
+
|
569 |
+
# Should detect value changes
|
570 |
+
self.assertIn("$.age", comparison["value_differences"])
|
571 |
+
self.assertIn("$.city", comparison["value_differences"])
|
572 |
+
self.assertNotIn("$.name", comparison["value_differences"])
|
573 |
+
|
574 |
+
# Test change details
|
575 |
+
age_change = comparison["value_differences"]["$.age"]
|
576 |
+
self.assertEqual(age_change["old_value"], "25")
|
577 |
+
self.assertEqual(age_change["new_value"], "26")
|
578 |
+
|
579 |
+
city_change = comparison["value_differences"]["$.city"]
|
580 |
+
self.assertEqual(city_change["old_value"], "New York")
|
581 |
+
self.assertEqual(city_change["new_value"], "Boston")
|
582 |
+
|
583 |
+
def test_array_size_changes_detection(self):
|
584 |
+
"""
|
585 |
+
Test detection of array size changes
|
586 |
+
"""
|
587 |
+
old_data = {
|
588 |
+
"items": [1, 2, 3],
|
589 |
+
"tags": ["a", "b"]
|
590 |
+
}
|
591 |
+
|
592 |
+
new_data = {
|
593 |
+
"items": [1, 2, 3, 4, 5], # size increased
|
594 |
+
"tags": ["a"] # size decreased
|
595 |
+
}
|
596 |
+
|
597 |
+
analyzer = JSONPathStructureAnalyzer()
|
598 |
+
analyzer.extract_all_paths(old_data)
|
599 |
+
comparison = analyzer.compare_json_structures(new_data)
|
600 |
+
|
601 |
+
# Should detect array size changes
|
602 |
+
self.assertIn("$.items[*]", comparison["array_size_changes"])
|
603 |
+
self.assertIn("$.tags[*]", comparison["array_size_changes"])
|
604 |
+
|
605 |
+
# Test size change details
|
606 |
+
items_change = comparison["array_size_changes"]["$.items[*]"]
|
607 |
+
self.assertEqual(items_change["old_size"], 3)
|
608 |
+
self.assertEqual(items_change["new_size"], 5)
|
609 |
+
self.assertEqual(items_change["size_change"], 2)
|
610 |
+
|
611 |
+
tags_change = comparison["array_size_changes"]["$.tags[*]"]
|
612 |
+
self.assertEqual(tags_change["old_size"], 2)
|
613 |
+
self.assertEqual(tags_change["new_size"], 1)
|
614 |
+
self.assertEqual(tags_change["size_change"], -1)
|
615 |
+
|
616 |
+
def test_compare_json_with_jsonpath_structures_function(self):
|
617 |
+
"""
|
618 |
+
Test the compare_json_with_jsonpath_structures convenience function
|
619 |
+
"""
|
620 |
+
# Test with print_report=False
|
621 |
+
comparison = compare_json_with_jsonpath_structures(old_json, new_json, print_report=False)
|
622 |
+
|
623 |
+
# Should return the same structure as the method
|
624 |
+
self.assertIn("summary", comparison)
|
625 |
+
self.assertIn("added_paths", comparison)
|
626 |
+
self.assertIn("removed_paths", comparison)
|
627 |
+
|
628 |
+
# Test that it works without printing (no exception thrown)
|
629 |
+
self.assertIsInstance(comparison, dict)
|
630 |
+
|
631 |
+
# Test specific changes
|
632 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
|
633 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
|
634 |
+
|
635 |
+
def test_nested_arrays_length_tracking(self):
|
636 |
+
"""
|
637 |
+
Test array length tracking for deeply nested arrays
|
638 |
+
"""
|
639 |
+
test_data = {
|
640 |
+
"level1": [
|
641 |
+
{"level2": [1, 2, 3]},
|
642 |
+
{"level2": [4, 5]},
|
643 |
+
{"level2": [6, 7, 8, 9]}
|
644 |
+
]
|
645 |
+
}
|
646 |
+
|
647 |
+
analyzer = JSONPathStructureAnalyzer()
|
648 |
+
analyzer.extract_all_paths(test_data)
|
649 |
+
array_lengths = analyzer.get_array_lengths()
|
650 |
+
|
651 |
+
# Should track both levels of arrays
|
652 |
+
self.assertIn("$.level1[*]", array_lengths)
|
653 |
+
self.assertIn("$.level1[*].level2[*]", array_lengths)
|
654 |
+
|
655 |
+
# Check lengths
|
656 |
+
self.assertEqual(array_lengths["$.level1[*]"], 3)
|
657 |
+
# Note: The nested array length will be from the last item processed (current implementation)
|
658 |
+
self.assertEqual(array_lengths["$.level1[*].level2[*]"], 4)
|
659 |
+
|
660 |
+
def test_type_changes_detection_in_comparison(self):
|
661 |
+
"""
|
662 |
+
Test detection of type changes in compare_json_structures
|
663 |
+
"""
|
664 |
+
old_data = {
|
665 |
+
"field1": "string_value",
|
666 |
+
"field2": 42,
|
667 |
+
"field3": [1, 2, 3]
|
668 |
+
}
|
669 |
+
|
670 |
+
new_data = {
|
671 |
+
"field1": 123,
|
672 |
+
"field2": 42,
|
673 |
+
"field3": "not_array"
|
674 |
+
}
|
675 |
+
|
676 |
+
analyzer = JSONPathStructureAnalyzer()
|
677 |
+
analyzer.extract_all_paths(old_data)
|
678 |
+
comparison = analyzer.compare_json_structures(new_data)
|
679 |
+
|
680 |
+
# Should detect type changes
|
681 |
+
self.assertIn("$.field1", comparison["type_changes"])
|
682 |
+
self.assertIn("$.field3", comparison["type_changes"])
|
683 |
+
self.assertNotIn("$.field2", comparison["type_changes"])
|
684 |
+
|
685 |
+
# Test change details
|
686 |
+
field1_change = comparison["type_changes"]["$.field1"]
|
687 |
+
self.assertEqual(field1_change["old_type"], "str")
|
688 |
+
self.assertEqual(field1_change["new_type"], "int")
|
689 |
+
|
690 |
+
field3_change = comparison["type_changes"]["$.field3"]
|
691 |
+
self.assertEqual(field3_change["new_type"], "str")
|
692 |
+
# This will check the type and expect a list
|
693 |
+
self.assertEqual(field3_change["old_type"], "list")
|
694 |
+
|
695 |
+
def test_analyze_dict_list_simple(self):
|
696 |
+
"""
|
697 |
+
Test analyze_dict_list_simple function with a list of dictionaries
|
698 |
+
"""
|
699 |
+
dict_list = [
|
700 |
+
{
|
701 |
+
"user": "john",
|
702 |
+
"age": 25,
|
703 |
+
"tags": ["admin", "user"]
|
704 |
+
},
|
705 |
+
{
|
706 |
+
"user": "jane",
|
707 |
+
"age": 30,
|
708 |
+
"tags": ["user"],
|
709 |
+
"active": True
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"user": "bob",
|
713 |
+
"score": 95.5,
|
714 |
+
"tags": ["guest", "temp", "new"]
|
715 |
+
}
|
716 |
+
]
|
717 |
+
|
718 |
+
# Test the function
|
719 |
+
results = analyze_dict_list_simple(dict_list)
|
720 |
+
|
721 |
+
# Basic structure tests
|
722 |
+
self.assertEqual(len(results), 3)
|
723 |
+
self.assertIsInstance(results, list)
|
724 |
+
|
725 |
+
# Test each result has expected keys
|
726 |
+
for i, result in enumerate(results):
|
727 |
+
self.assertEqual(result["index"], i)
|
728 |
+
self.assertIn("paths_with_types", result)
|
729 |
+
self.assertIn("detailed_report", result)
|
730 |
+
self.assertIn("array_lengths", result)
|
731 |
+
self.assertIn("structure_report", result)
|
732 |
+
|
733 |
+
# Test first dict analysis
|
734 |
+
first_result = results[0]
|
735 |
+
self.assertIn("$.user", first_result["paths_with_types"])
|
736 |
+
self.assertIn("$.age", first_result["paths_with_types"])
|
737 |
+
self.assertIn("$.tags[*]", first_result["paths_with_types"])
|
738 |
+
self.assertEqual(first_result["paths_with_types"]["$.user"], "str")
|
739 |
+
self.assertEqual(first_result["paths_with_types"]["$.age"], "int")
|
740 |
+
self.assertEqual(first_result["paths_with_types"]["$.tags[*]"], "array")
|
741 |
+
self.assertEqual(first_result["array_lengths"]["$.tags[*]"], 2)
|
742 |
+
|
743 |
+
# Test second dict has additional field
|
744 |
+
second_result = results[1]
|
745 |
+
self.assertIn("$.active", second_result["paths_with_types"])
|
746 |
+
self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
|
747 |
+
self.assertEqual(second_result["array_lengths"]["$.tags[*]"], 1)
|
748 |
+
|
749 |
+
# Test third dict differences
|
750 |
+
third_result = results[2]
|
751 |
+
self.assertIn("$.score", third_result["paths_with_types"])
|
752 |
+
self.assertNotIn("$.age", third_result["paths_with_types"]) # age missing in third dict
|
753 |
+
self.assertEqual(third_result["paths_with_types"]["$.score"], "float")
|
754 |
+
self.assertEqual(third_result["array_lengths"]["$.tags[*]"], 3)
|
755 |
+
|
756 |
+
# Test structure reports are strings
|
757 |
+
for result in results:
|
758 |
+
self.assertIsInstance(result["structure_report"], str)
|
759 |
+
self.assertGreater(len(result["structure_report"]), 0)
|
760 |
+
|
761 |
+
# Test detailed reports have proper structure
|
762 |
+
for result in results:
|
763 |
+
detailed = result["detailed_report"]
|
764 |
+
for path, info in detailed.items():
|
765 |
+
self.assertIn("types", info)
|
766 |
+
self.assertIn("primary_type", info)
|
767 |
+
self.assertIn("is_array", info)
|
768 |
+
self.assertIn("samples", info)
|
769 |
+
self.assertIn("sample_count", info)
|
770 |
+
|
771 |
+
def test_filter_paths_excluding_keys(self):
|
772 |
+
"""
|
773 |
+
Test filtering paths to exclude specific keys
|
774 |
+
"""
|
775 |
+
test_data = {
|
776 |
+
'definition': 'enjoying or showing or marked by joy or pleasure',
|
777 |
+
'examples': ['a happy smile', 'spent many happy days on the beach'],
|
778 |
+
'related_words': [{'base_form': 'euphoric'}, {'base_form': 'elated'}],
|
779 |
+
'relation_type': 'also_see',
|
780 |
+
'source': 'wordnet',
|
781 |
+
'wordnet_pos': 'a'
|
782 |
+
}
|
783 |
+
|
784 |
+
analyzer = JSONPathStructureAnalyzer()
|
785 |
+
analyzer.extract_all_paths(test_data)
|
786 |
+
|
787 |
+
# Test without exclusion
|
788 |
+
all_paths = analyzer.paths
|
789 |
+
self.assertIn("$.examples[*]", all_paths)
|
790 |
+
self.assertIn("$.definition", all_paths)
|
791 |
+
|
792 |
+
# Test with exclusion
|
793 |
+
filtered_paths = analyzer.filter_paths_excluding_keys({'examples'})
|
794 |
+
self.assertNotIn("$.examples[*]", filtered_paths)
|
795 |
+
self.assertIn("$.definition", filtered_paths)
|
796 |
+
self.assertIn("$.related_words[*]", filtered_paths)
|
797 |
+
self.assertIn("$.related_words[*].base_form", filtered_paths)
|
798 |
+
|
799 |
+
# Test excluding multiple keys
|
800 |
+
filtered_paths_multi = analyzer.filter_paths_excluding_keys({'examples', 'source'})
|
801 |
+
self.assertNotIn("$.examples[*]", filtered_paths_multi)
|
802 |
+
self.assertNotIn("$.source", filtered_paths_multi)
|
803 |
+
self.assertIn("$.definition", filtered_paths_multi)
|
804 |
+
|
805 |
+
|
806 |
+
def test_get_filtered_structure_report(self):
|
807 |
+
"""
|
808 |
+
Test filtered structure report generation
|
809 |
+
"""
|
810 |
+
test_data = {
|
811 |
+
'definition': 'test definition',
|
812 |
+
'examples': ['example1', 'example2'],
|
813 |
+
'metadata': {'source': 'test', 'version': 1},
|
814 |
+
'tags': ['tag1', 'tag2', 'tag3']
|
815 |
+
}
|
816 |
+
|
817 |
+
analyzer = JSONPathStructureAnalyzer()
|
818 |
+
analyzer.extract_all_paths(test_data)
|
819 |
+
|
820 |
+
# Test filtered report
|
821 |
+
filtered_report = analyzer.get_filtered_structure_report({'examples'})
|
822 |
+
|
823 |
+
# Should not contain examples
|
824 |
+
self.assertNotIn("examples", filtered_report)
|
825 |
+
|
826 |
+
# Should contain other fields
|
827 |
+
self.assertIn("$.definition", filtered_report)
|
828 |
+
self.assertIn("$.metadata", filtered_report)
|
829 |
+
self.assertIn("$.tags[*]", filtered_report)
|
830 |
+
|
831 |
+
# Test structure
|
832 |
+
lines = filtered_report.split('\n')
|
833 |
+
self.assertGreater(len(lines), 0)
|
834 |
+
|
835 |
+
# Verify specific content
|
836 |
+
self.assertIn("$.definition -- test definition", filtered_report)
|
837 |
+
self.assertIn("$.tags[*] -- array[3]", filtered_report)
|
838 |
+
|
839 |
+
def test_get_filtered_paths_with_types(self):
|
840 |
+
"""
|
841 |
+
Test filtered paths with types
|
842 |
+
"""
|
843 |
+
test_data = {
|
844 |
+
'name': 'test',
|
845 |
+
'count': 42,
|
846 |
+
'items': [1, 2, 3],
|
847 |
+
'exclude_me': {'nested': 'value'}
|
848 |
+
}
|
849 |
+
|
850 |
+
analyzer = JSONPathStructureAnalyzer()
|
851 |
+
analyzer.extract_all_paths(test_data)
|
852 |
+
|
853 |
+
# Test filtered paths with types
|
854 |
+
filtered_paths_types = analyzer.get_filtered_paths_with_types({'exclude_me'})
|
855 |
+
|
856 |
+
# Should not contain excluded paths
|
857 |
+
self.assertNotIn("$.exclude_me", filtered_paths_types)
|
858 |
+
self.assertNotIn("$.exclude_me.nested", filtered_paths_types)
|
859 |
+
|
860 |
+
# Should contain other paths
|
861 |
+
self.assertIn("$.name", filtered_paths_types)
|
862 |
+
self.assertIn("$.count", filtered_paths_types)
|
863 |
+
self.assertIn("$.items[*]", filtered_paths_types)
|
864 |
+
|
865 |
+
# Test types
|
866 |
+
self.assertEqual(filtered_paths_types["$.name"], "str")
|
867 |
+
self.assertEqual(filtered_paths_types["$.count"], "int")
|
868 |
+
self.assertEqual(filtered_paths_types["$.items[*]"], "array")
|
869 |
+
|
870 |
+
def test_get_filtered_detailed_type_report(self):
|
871 |
+
"""
|
872 |
+
Test filtered detailed type report
|
873 |
+
"""
|
874 |
+
test_data = {
|
875 |
+
'title': 'Sample Title',
|
876 |
+
'description': 'Sample Description',
|
877 |
+
'private_data': {'secret': 'hidden'},
|
878 |
+
'public_list': ['item1', 'item2']
|
879 |
+
}
|
880 |
+
|
881 |
+
analyzer = JSONPathStructureAnalyzer()
|
882 |
+
analyzer.extract_all_paths(test_data)
|
883 |
+
|
884 |
+
# Test filtered detailed report
|
885 |
+
filtered_detailed = analyzer.get_filtered_detailed_type_report({'private_data'})
|
886 |
+
|
887 |
+
# Should not contain excluded paths
|
888 |
+
self.assertNotIn("$.private_data", filtered_detailed)
|
889 |
+
self.assertNotIn("$.private_data.secret", filtered_detailed)
|
890 |
+
|
891 |
+
# Should contain other paths
|
892 |
+
self.assertIn("$.title", filtered_detailed)
|
893 |
+
self.assertIn("$.public_list[*]", filtered_detailed)
|
894 |
+
|
895 |
+
# Test structure of remaining items
|
896 |
+
title_info = filtered_detailed["$.title"]
|
897 |
+
self.assertEqual(title_info["primary_type"], "str")
|
898 |
+
self.assertFalse(title_info["is_array"])
|
899 |
+
self.assertIn("Sample Title", title_info["samples"])
|
900 |
+
|
901 |
+
list_info = filtered_detailed["$.public_list[*]"]
|
902 |
+
self.assertEqual(list_info["primary_type"], "array")
|
903 |
+
self.assertTrue(list_info["is_array"])
|
904 |
+
self.assertEqual(list_info["array_length"], 2)
|
905 |
+
|
906 |
+
def test_analyze_dict_list_simple_with_exclusion(self):
|
907 |
+
"""
|
908 |
+
Test analyze_dict_list_simple with key exclusion
|
909 |
+
"""
|
910 |
+
dict_list = [
|
911 |
+
{
|
912 |
+
"name": "John",
|
913 |
+
"age": 25,
|
914 |
+
"private_info": {"ssn": "123-45-6789"},
|
915 |
+
"tags": ["user", "admin"]
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"name": "Jane",
|
919 |
+
"age": 30,
|
920 |
+
"private_info": {"ssn": "987-65-4321"},
|
921 |
+
"tags": ["user"],
|
922 |
+
"active": True
|
923 |
+
}
|
924 |
+
]
|
925 |
+
|
926 |
+
# Test with exclusion
|
927 |
+
results = analyze_dict_list_simple(dict_list, exclude_keys={'private_info'})
|
928 |
+
|
929 |
+
# Basic structure tests
|
930 |
+
self.assertEqual(len(results), 2)
|
931 |
+
|
932 |
+
# Test that private_info is excluded from all results
|
933 |
+
for result in results:
|
934 |
+
paths_with_types = result["paths_with_types"]
|
935 |
+
detailed_report = result["detailed_report"]
|
936 |
+
|
937 |
+
# Should not contain private_info paths
|
938 |
+
private_paths = [path for path in paths_with_types.keys() if 'private_info' in path]
|
939 |
+
self.assertEqual(len(private_paths), 0, "private_info paths should be excluded")
|
940 |
+
|
941 |
+
private_detailed = [path for path in detailed_report.keys() if 'private_info' in path]
|
942 |
+
self.assertEqual(len(private_detailed), 0, "private_info should be excluded from detailed report")
|
943 |
+
|
944 |
+
# Should contain other paths
|
945 |
+
self.assertIn("$.name", paths_with_types)
|
946 |
+
self.assertIn("$.age", paths_with_types)
|
947 |
+
self.assertIn("$.tags[*]", paths_with_types)
|
948 |
+
|
949 |
+
# Test second dict has additional field (but not private_info)
|
950 |
+
second_result = results[1]
|
951 |
+
self.assertIn("$.active", second_result["paths_with_types"])
|
952 |
+
self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
|
953 |
+
|
954 |
+
# Test structure reports don't contain excluded keys
|
955 |
+
for result in results:
|
956 |
+
structure_report = result["structure_report"]
|
957 |
+
self.assertNotIn("private_info", structure_report)
|
958 |
+
self.assertIn("$.name", structure_report)
|
959 |
+
|
960 |
+
def test_exclusion_with_nested_arrays(self):
|
961 |
+
"""
|
962 |
+
Test exclusion works with nested arrays and complex structures
|
963 |
+
"""
|
964 |
+
test_data = {
|
965 |
+
"valid_data": {
|
966 |
+
"items": [
|
967 |
+
{"id": 1, "name": "item1"},
|
968 |
+
{"id": 2, "name": "item2"}
|
969 |
+
]
|
970 |
+
},
|
971 |
+
"sensitive_data": {
|
972 |
+
"secrets": [
|
973 |
+
{"key": "secret1", "value": "hidden1"},
|
974 |
+
{"key": "secret2", "value": "hidden2"}
|
975 |
+
]
|
976 |
+
}
|
977 |
+
}
|
978 |
+
|
979 |
+
analyzer = JSONPathStructureAnalyzer()
|
980 |
+
analyzer.extract_all_paths(test_data)
|
981 |
+
|
982 |
+
# Test exclusion of nested structure
|
983 |
+
filtered_paths = analyzer.filter_paths_excluding_keys({'sensitive_data'})
|
984 |
+
|
985 |
+
# Should exclude all sensitive_data paths
|
986 |
+
sensitive_paths = [path for path in analyzer.paths if 'sensitive_data' in path]
|
987 |
+
self.assertGreater(len(sensitive_paths), 0, "Should have sensitive_data paths in original")
|
988 |
+
|
989 |
+
for sensitive_path in sensitive_paths:
|
990 |
+
self.assertNotIn(sensitive_path, filtered_paths, f"Should exclude {sensitive_path}")
|
991 |
+
|
992 |
+
# Should keep valid_data paths
|
993 |
+
self.assertIn("$.valid_data", filtered_paths)
|
994 |
+
self.assertIn("$.valid_data.items[*]", filtered_paths)
|
995 |
+
self.assertIn("$.valid_data.items[*].id", filtered_paths)
|
996 |
+
self.assertIn("$.valid_data.items[*].name", filtered_paths)
|
997 |
+
|
998 |
+
|
999 |
+
class TestJSONPathComparator(unittest.TestCase):
|
1000 |
+
"""
|
1001 |
+
Test JSONPath structure comparison functionality
|
1002 |
+
"""
|
1003 |
+
|
1004 |
+
def test_extract_structure_paths_comparison(self):
|
1005 |
+
"""
|
1006 |
+
Test extraction of structure paths from both JSON structures
|
1007 |
+
"""
|
1008 |
+
old_paths = extract_structure_paths(old_json)
|
1009 |
+
new_paths = extract_structure_paths(new_json)
|
1010 |
+
|
1011 |
+
# Verify we get a reasonable number of paths
|
1012 |
+
self.assertGreaterEqual(len(old_paths), 7)
|
1013 |
+
self.assertGreaterEqual(len(new_paths), 7)
|
1014 |
+
|
1015 |
+
# Verify specific differences
|
1016 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", old_paths)
|
1017 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", new_paths)
|
1018 |
+
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_paths)
|
1019 |
+
self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_paths)
|
1020 |
+
|
1021 |
+
def test_extract_structure_paths_with_types(self):
|
1022 |
+
"""
|
1023 |
+
Test extraction of structure paths with type information
|
1024 |
+
"""
|
1025 |
+
old_paths_with_types = extract_structure_paths_with_types(old_json)
|
1026 |
+
new_paths_with_types = extract_structure_paths_with_types(new_json)
|
1027 |
+
|
1028 |
+
# Verify we get type information
|
1029 |
+
self.assertEqual(old_paths_with_types["$.key1"], "string")
|
1030 |
+
self.assertEqual(old_paths_with_types["$.key2"], "integer")
|
1031 |
+
self.assertEqual(old_paths_with_types["$.key_nested1"], "object")
|
1032 |
+
|
1033 |
+
# Check the actual array field, not the [*] path
|
1034 |
+
self.assertIn("array", old_paths_with_types["$.key_nested1.array_nested_4"])
|
1035 |
+
|
1036 |
+
# The [*] path represents the type of array elements (first element)
|
1037 |
+
self.assertEqual(old_paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
|
1038 |
+
|
1039 |
+
# Verify type differences between old and new
|
1040 |
+
self.assertIn("$.key_nested1.array_nested_4", old_paths_with_types)
|
1041 |
+
self.assertIn("$.key_nested1.array_changed_4", new_paths_with_types)
|
1042 |
+
self.assertNotIn("$.key_nested1.array_nested_4", new_paths_with_types)
|
1043 |
+
|
1044 |
+
def test_compare_structures_array_rename(self):
|
1045 |
+
"""
|
1046 |
+
Test comparison detects array field rename
|
1047 |
+
"""
|
1048 |
+
comparator = JSONPathComparator()
|
1049 |
+
comparison = comparator.compare_structures(old_json, new_json)
|
1050 |
+
|
1051 |
+
# Should detect removed paths (old structure)
|
1052 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
|
1053 |
+
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", comparison["removed_paths"])
|
1054 |
+
|
1055 |
+
# Should detect added paths (new structure)
|
1056 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
|
1057 |
+
self.assertIn("$.key_nested1.array_changed_4[*].last_change", comparison["added_paths"])
|
1058 |
+
|
1059 |
+
def test_compare_structures_with_types(self):
|
1060 |
+
"""
|
1061 |
+
Test comparison with type information
|
1062 |
+
"""
|
1063 |
+
comparator = JSONPathComparator()
|
1064 |
+
comparison = comparator.compare_structures_with_types(old_json, new_json)
|
1065 |
+
|
1066 |
+
# Should detect added paths with types
|
1067 |
+
self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
|
1068 |
+
|
1069 |
+
# Check for the actual existing paths in the comparison
|
1070 |
+
# The deeply nested paths might not be included in the type comparison
|
1071 |
+
if "$.key_nested1.array_changed_4[*].last_change" in comparison["added_paths"]:
|
1072 |
+
self.assertIn("string", comparison["added_paths"]["$.key_nested1.array_changed_4[*].last_change"])
|
1073 |
+
|
1074 |
+
# Should detect removed paths with types
|
1075 |
+
self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
|
1076 |
+
|
1077 |
+
# Should detect common paths with types
|
1078 |
+
self.assertIn("$.key1", comparison["common_paths"])
|
1079 |
+
self.assertEqual(comparison["common_paths"]["$.key1"], "string")
|
1080 |
+
|
1081 |
+
# Should not detect type changes for this example (same types, different paths)
|
1082 |
+
self.assertEqual(len(comparison["type_changes"]), 0)
|
1083 |
+
|
1084 |
+
def test_type_changes_detection(self):
|
1085 |
+
"""
|
1086 |
+
Test detection of type changes in paths
|
1087 |
+
"""
|
1088 |
+
# Create test data with type changes
|
1089 |
+
json_with_string = {"test_field": "hello"}
|
1090 |
+
json_with_number = {"test_field": 42}
|
1091 |
+
|
1092 |
+
comparator = JSONPathComparator()
|
1093 |
+
comparison = comparator.compare_structures_with_types(json_with_string, json_with_number)
|
1094 |
+
|
1095 |
+
# Should detect type change
|
1096 |
+
self.assertIn("$.test_field", comparison["type_changes"])
|
1097 |
+
self.assertEqual(comparison["type_changes"]["$.test_field"]["old_type"], "string")
|
1098 |
+
self.assertEqual(comparison["type_changes"]["$.test_field"]["new_type"], "integer")
|
1099 |
+
|
1100 |
+
def test_compare_structures_unchanged_paths(self):
|
1101 |
+
"""
|
1102 |
+
Test that unchanged paths are correctly identified
|
1103 |
+
"""
|
1104 |
+
comparator = JSONPathComparator()
|
1105 |
+
comparison = comparator.compare_structures(old_json, new_json)
|
1106 |
+
|
1107 |
+
# These paths should remain unchanged
|
1108 |
+
unchanged_paths = [
|
1109 |
+
"$.key1",
|
1110 |
+
"$.key2",
|
1111 |
+
"$.key_nested1",
|
1112 |
+
"$.key_nested1.key_nested2",
|
1113 |
+
"$.key_nested1.key_nested3"
|
1114 |
+
]
|
1115 |
+
|
1116 |
+
for path in unchanged_paths:
|
1117 |
+
self.assertIn(path, comparison["common_paths"], f"Path {path} should be in common paths")
|
1118 |
+
self.assertNotIn(path, comparison["added_paths"], f"Path {path} should not be added")
|
1119 |
+
self.assertNotIn(path, comparison["removed_paths"], f"Path {path} should not be removed")
|
1120 |
+
|
1121 |
+
def test_compare_structures_nested_array_preserved(self):
|
1122 |
+
"""
|
1123 |
+
Test the deeply nested array structure is preserved despite parent changes
|
1124 |
+
"""
|
1125 |
+
comparator = JSONPathComparator()
|
1126 |
+
comparison = comparator.compare_structures(old_json, new_json)
|
1127 |
+
|
1128 |
+
# The nested array should exist in both (though path changed due to parent rename)
|
1129 |
+
old_nested_array = "$.key_nested1.array_nested_4[*].array_nested_5[*]"
|
1130 |
+
new_nested_array = "$.key_nested1.array_changed_4[*].array_nested_5[*]"
|
1131 |
+
|
1132 |
+
self.assertIn(old_nested_array, comparison["removed_paths"])
|
1133 |
+
self.assertIn(new_nested_array, comparison["added_paths"])
|
1134 |
+
|
1135 |
+
def test_path_validations_with_specific_paths(self):
|
1136 |
+
"""
|
1137 |
+
Test validation of specific paths between old and new structures
|
1138 |
+
"""
|
1139 |
+
common_paths = [
|
1140 |
+
"$.key1", # Should exist in both
|
1141 |
+
"$.key2", # Should exist in both
|
1142 |
+
"$.key_nested1.array_nested_4[*]", # Exists only in old
|
1143 |
+
"$.key_nested1.array_changed_4[*]", # Exists only in new
|
1144 |
+
"$.key_nested1.key_nested2" # Should exist in both
|
1145 |
+
]
|
1146 |
+
|
1147 |
+
comparator = JSONPathComparator(common_paths)
|
1148 |
+
comparison = comparator.compare_structures(old_json, new_json)
|
1149 |
+
|
1150 |
+
validations = comparison["path_validations"]
|
1151 |
+
|
1152 |
+
# Test paths that exist in both
|
1153 |
+
self.assertEqual(validations["$.key1"]["status"], "✅")
|
1154 |
+
self.assertTrue(validations["$.key1"]["old_found"])
|
1155 |
+
self.assertTrue(validations["$.key1"]["new_found"])
|
1156 |
+
|
1157 |
+
# Test paths that exist only in old
|
1158 |
+
self.assertEqual(validations["$.key_nested1.array_nested_4[*]"]["status"], "❌")
|
1159 |
+
self.assertTrue(validations["$.key_nested1.array_nested_4[*]"]["old_found"])
|
1160 |
+
self.assertFalse(validations["$.key_nested1.array_nested_4[*]"]["new_found"])
|
1161 |
+
|
1162 |
+
# Test paths that exist only in new
|
1163 |
+
self.assertEqual(validations["$.key_nested1.array_changed_4[*]"]["status"], "❌")
|
1164 |
+
self.assertFalse(validations["$.key_nested1.array_changed_4[*]"]["old_found"])
|
1165 |
+
self.assertTrue(validations["$.key_nested1.array_changed_4[*]"]["new_found"])
|
1166 |
+
|
1167 |
+
|
1168 |
+
class TestJSONPathIntegration(unittest.TestCase):
|
1169 |
+
"""
|
1170 |
+
Integration tests for the complete JSONPath diff workflow
|
1171 |
+
"""
|
1172 |
+
|
1173 |
+
def test_complete_diff_workflow(self):
|
1174 |
+
"""
|
1175 |
+
Test the complete workflow from analysis to comparison using the provided data
|
1176 |
+
"""
|
1177 |
+
# Step 1: Analyze the old structure
|
1178 |
+
old_report = analyze_with_jsonpath(old_json)
|
1179 |
+
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_report)
|
1180 |
+
|
1181 |
+
# Step 2: Analyze the new structure
|
1182 |
+
new_report = analyze_with_jsonpath(new_json)
|
1183 |
+
self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_report)
|
1184 |
+
|
1185 |
+
# Step 3: Compare structures
|
1186 |
+
critical_paths = [
|
1187 |
+
"$.key1",
|
1188 |
+
"$.key2",
|
1189 |
+
"$.key_nested1.key_nested2",
|
1190 |
+
"$.key_nested1.key_nested3"
|
1191 |
+
]
|
1192 |
+
|
1193 |
+
comparison = compare_json_with_jsonpath(old_json, new_json, critical_paths)
|
1194 |
+
|
1195 |
+
# Verify comparison results
|
1196 |
+
self.assertIsInstance(comparison, dict)
|
1197 |
+
self.assertGreater(len(comparison["added_paths"]), 0)
|
1198 |
+
self.assertGreater(len(comparison["removed_paths"]), 0)
|
1199 |
+
self.assertGreater(len(comparison["common_paths"]), 0)
|
1200 |
+
|
1201 |
+
def test_complete_diff_workflow_with_types(self):
|
1202 |
+
"""
|
1203 |
+
Test the complete workflow with type information
|
1204 |
+
"""
|
1205 |
+
# Step 1: Compare structures with types
|
1206 |
+
critical_paths = [
|
1207 |
+
"$.key1",
|
1208 |
+
"$.key2",
|
1209 |
+
"$.key_nested1.key_nested2"
|
1210 |
+
]
|
1211 |
+
|
1212 |
+
comparison = compare_json_with_jsonpath_and_types(old_json, new_json, critical_paths)
|
1213 |
+
|
1214 |
+
# Verify comparison results include type information
|
1215 |
+
self.assertIsInstance(comparison, dict)
|
1216 |
+
self.assertIn("added_paths", comparison)
|
1217 |
+
self.assertIn("removed_paths", comparison)
|
1218 |
+
self.assertIn("type_changes", comparison)
|
1219 |
+
|
1220 |
+
# Verify type information is included
|
1221 |
+
if comparison["added_paths"]:
|
1222 |
+
for path, type_info in comparison["added_paths"].items():
|
1223 |
+
self.assertIsInstance(type_info, str)
|
1224 |
+
self.assertGreater(len(type_info), 0)
|
1225 |
+
|
1226 |
+
def test_detect_specific_changes(self):
|
1227 |
+
"""
|
1228 |
+
Test detection of the specific changes between old and new JSON
|
1229 |
+
"""
|
1230 |
+
comparison = compare_json_with_jsonpath(old_json, new_json)
|
1231 |
+
|
1232 |
+
# Key changes that should be detected:
|
1233 |
+
# 1. array_nested_4 -> array_changed_4
|
1234 |
+
# 2. key_nested4 -> last_change
|
1235 |
+
|
1236 |
+
expected_removed = [
|
1237 |
+
"$.key_nested1.array_nested_4[*]",
|
1238 |
+
"$.key_nested1.array_nested_4[*].key_nested4"
|
1239 |
+
]
|
1240 |
+
|
1241 |
+
expected_added = [
|
1242 |
+
"$.key_nested1.array_changed_4[*]",
|
1243 |
+
"$.key_nested1.array_changed_4[*].last_change"
|
1244 |
+
]
|
1245 |
+
|
1246 |
+
for path in expected_removed:
|
1247 |
+
self.assertIn(path, comparison["removed_paths"], f"Expected removed path {path} not found")
|
1248 |
+
|
1249 |
+
for path in expected_added:
|
1250 |
+
self.assertIn(path, comparison["added_paths"], f"Expected added path {path} not found")
|
1251 |
+
|
1252 |
+
def test_structure_variations_old(self):
|
1253 |
+
"""
|
1254 |
+
Test that old JSON structure contains expected array name
|
1255 |
+
"""
|
1256 |
+
analyzer = JSONPathStructureAnalyzer()
|
1257 |
+
paths = analyzer.extract_all_paths(old_json)
|
1258 |
+
|
1259 |
+
expected_path = "$.key_nested1.array_nested_4[*]"
|
1260 |
+
self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
|
1261 |
+
|
1262 |
+
def test_structure_variations_new(self):
|
1263 |
+
"""
|
1264 |
+
Test that new JSON structure contains expected array name
|
1265 |
+
"""
|
1266 |
+
analyzer = JSONPathStructureAnalyzer()
|
1267 |
+
paths = analyzer.extract_all_paths(new_json)
|
1268 |
+
|
1269 |
+
expected_path = "$.key_nested1.array_changed_4[*]"
|
1270 |
+
self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
|
1271 |
+
|
1272 |
+
def test_json_string_compatibility(self):
|
1273 |
+
"""
|
1274 |
+
Test that the tools work with JSON strings (serialized/deserialized)
|
1275 |
+
"""
|
1276 |
+
# Convert to JSON string and back
|
1277 |
+
old_string = json.dumps(old_json)
|
1278 |
+
new_string = json.dumps(new_json)
|
1279 |
+
|
1280 |
+
old_parsed = json.loads(old_string)
|
1281 |
+
new_parsed = json.loads(new_string)
|
1282 |
+
|
1283 |
+
# Should work the same as with dict objects
|
1284 |
+
comparison = compare_json_with_jsonpath(old_parsed, new_parsed)
|
1285 |
+
|
1286 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
|
1287 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
|
1288 |
+
|
1289 |
+
|
1290 |
+
class TestEdgeCases(unittest.TestCase):
|
1291 |
+
"""
|
1292 |
+
Test edge cases with the provided data structure
|
1293 |
+
"""
|
1294 |
+
|
1295 |
+
def test_empty_json_comparison(self):
|
1296 |
+
"""
|
1297 |
+
Test comparison with empty JSON
|
1298 |
+
"""
|
1299 |
+
empty_json = {}
|
1300 |
+
|
1301 |
+
comparison = compare_json_with_jsonpath(old_json, empty_json)
|
1302 |
+
|
1303 |
+
# All old paths should be removed
|
1304 |
+
self.assertGreater(len(comparison["removed_paths"]), 0)
|
1305 |
+
self.assertEqual(len(comparison["added_paths"]), 0)
|
1306 |
+
self.assertEqual(len(comparison["common_paths"]), 0)
|
1307 |
+
|
1308 |
+
def test_empty_json_comparison_with_types(self):
|
1309 |
+
"""
|
1310 |
+
Test comparison with empty JSON including types
|
1311 |
+
"""
|
1312 |
+
empty_json = {}
|
1313 |
+
|
1314 |
+
comparison = compare_json_with_jsonpath_and_types(old_json, empty_json)
|
1315 |
+
|
1316 |
+
# All old paths should be removed with type info
|
1317 |
+
self.assertGreater(len(comparison["removed_paths"]), 0)
|
1318 |
+
self.assertEqual(len(comparison["added_paths"]), 0)
|
1319 |
+
|
1320 |
+
# Empty JSON still has the root path "$" in common
|
1321 |
+
# The comparison includes the root "$" path in both structures
|
1322 |
+
self.assertLessEqual(len(comparison["common_paths"]), 1)
|
1323 |
+
|
1324 |
+
# Verify type information is preserved
|
1325 |
+
for path, type_info in comparison["removed_paths"].items():
|
1326 |
+
self.assertIsInstance(type_info, str)
|
1327 |
+
|
1328 |
+
def test_identical_json_comparison(self):
|
1329 |
+
"""
|
1330 |
+
Test comparison of identical JSON structures
|
1331 |
+
"""
|
1332 |
+
comparison = compare_json_with_jsonpath(old_json, old_json)
|
1333 |
+
|
1334 |
+
# Should have no changes
|
1335 |
+
self.assertEqual(len(comparison["added_paths"]), 0)
|
1336 |
+
self.assertEqual(len(comparison["removed_paths"]), 0)
|
1337 |
+
self.assertGreater(len(comparison["common_paths"]), 0)
|
1338 |
+
|
1339 |
+
def test_identical_json_comparison_with_types(self):
|
1340 |
+
"""
|
1341 |
+
Test comparison of identical JSON structures with types
|
1342 |
+
"""
|
1343 |
+
comparison = compare_json_with_jsonpath_and_types(old_json, old_json)
|
1344 |
+
|
1345 |
+
# Should have no changes
|
1346 |
+
self.assertEqual(len(comparison["added_paths"]), 0)
|
1347 |
+
self.assertEqual(len(comparison["removed_paths"]), 0)
|
1348 |
+
self.assertEqual(len(comparison["type_changes"]), 0)
|
1349 |
+
self.assertGreater(len(comparison["common_paths"]), 0)
|
1350 |
+
|
1351 |
+
def test_deep_nested_array_analysis(self):
|
1352 |
+
"""
|
1353 |
+
Test analysis of the deepest nested array (array_nested_5)
|
1354 |
+
"""
|
1355 |
+
analyzer = JSONPathStructureAnalyzer()
|
1356 |
+
analyzer.extract_all_paths(old_json)
|
1357 |
+
report = analyzer.get_structure_report()
|
1358 |
+
|
1359 |
+
# Should properly analyze the deeply nested array
|
1360 |
+
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
|
1361 |
+
|
1362 |
+
def test_array_type_detection(self):
|
1363 |
+
"""
|
1364 |
+
Test proper detection of array types in extract_structure_paths_with_types
|
1365 |
+
"""
|
1366 |
+
paths_with_types = extract_structure_paths_with_types(old_json)
|
1367 |
+
|
1368 |
+
# Test array type detection on the actual array field, not the [*] path
|
1369 |
+
# The array field itself should have "array" in its type
|
1370 |
+
self.assertIn("array", paths_with_types["$.key_nested1.array_nested_4"])
|
1371 |
+
# The [*] path represents the type of array elements (the first element is a string)
|
1372 |
+
self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
|
1373 |
+
|
1374 |
+
|
1375 |
+
class TestSimpleUsageExamples(unittest.TestCase):
|
1376 |
+
"""
|
1377 |
+
Simple examples showing how to use the JSONPath diff tool
|
1378 |
+
"""
|
1379 |
+
|
1380 |
+
def test_basic_structure_analysis_old(self):
|
1381 |
+
"""
|
1382 |
+
Analyze the structure of the old JSON
|
1383 |
+
"""
|
1384 |
+
report = analyze_with_jsonpath(old_json)
|
1385 |
+
app_logger.info("\nOLD JSON STRUCTURE:")
|
1386 |
+
app_logger.info(report)
|
1387 |
+
|
1388 |
+
# Basic assertions
|
1389 |
+
self.assertIn("$.key1 -- row 1", report)
|
1390 |
+
self.assertIn("$.key2 -- 22", report)
|
1391 |
+
self.assertIn("array_nested_4", report)
|
1392 |
+
self.assertIn("key_nested4", report)
|
1393 |
+
|
1394 |
+
def test_basic_structure_analysis_new(self):
|
1395 |
+
"""
|
1396 |
+
Analyze the structure of the new JSON
|
1397 |
+
"""
|
1398 |
+
report = analyze_with_jsonpath(new_json)
|
1399 |
+
app_logger.info("\nNEW JSON STRUCTURE:")
|
1400 |
+
app_logger.info(report)
|
1401 |
+
|
1402 |
+
# Basic assertions
|
1403 |
+
self.assertIn("$.key1 -- row 1", report)
|
1404 |
+
self.assertIn("$.key2 -- 22", report)
|
1405 |
+
self.assertIn("array_changed_4", report)
|
1406 |
+
self.assertIn("last_change", report)
|
1407 |
+
|
1408 |
+
def test_basic_comparison(self):
|
1409 |
+
"""
|
1410 |
+
Compare old versus new JSON structures
|
1411 |
+
"""
|
1412 |
+
app_logger.info("\nCOMPARISON RESULTS:")
|
1413 |
+
comparison = compare_json_with_jsonpath(old_json, new_json)
|
1414 |
+
|
1415 |
+
# Verify the main changes
|
1416 |
+
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
|
1417 |
+
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
|
1418 |
+
|
1419 |
+
# Verify unchanged elements
|
1420 |
+
self.assertIn("$.key1", comparison["common_paths"])
|
1421 |
+
self.assertIn("$.key2", comparison["common_paths"])
|
1422 |
+
|
1423 |
+
def test_basic_comparison_with_types(self):
|
1424 |
+
"""
|
1425 |
+
Compare old versus new JSON structures with type information
|
1426 |
+
"""
|
1427 |
+
app_logger.info("\nCOMPARISON RESULTS WITH TYPES:")
|
1428 |
+
comparison = compare_json_with_jsonpath_and_types(old_json, new_json)
|
1429 |
+
|
1430 |
+
# Verify the main changes with types
|
1431 |
+
self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
|
1432 |
+
self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
|
1433 |
+
|
1434 |
+
# Verify type information is included
|
1435 |
+
self.assertEqual(comparison["common_paths"]["$.key1"], "string")
|
1436 |
+
self.assertEqual(comparison["common_paths"]["$.key2"], "integer")
|
1437 |
+
|
1438 |
+
|
1439 |
+
if __name__ == '__main__':
|
1440 |
+
unittest.main()
|
tests/my_ghost_writer/test_text_parsers2.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import unittest
|
2 |
from unittest.mock import patch, MagicMock
|
3 |
|
@@ -5,6 +6,11 @@ from fastapi import HTTPException
|
|
5 |
|
6 |
from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, get_wordnet_synonyms, inflect_synonym,
|
7 |
is_nlp_available, process_synonym_groups)
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
class TestTextParsers2(unittest.TestCase):
|
@@ -77,42 +83,45 @@ class TestTextParsers2(unittest.TestCase):
|
|
77 |
|
78 |
def test_get_wordnet_synonyms(self):
|
79 |
# Test with a word that has known synonyms
|
|
|
|
|
80 |
word = "piano"
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
self.assertIsInstance(first_result, dict)
|
86 |
-
self.assertIn('definition', first_result)
|
87 |
-
self.assertIn('examples', first_result)
|
88 |
-
self.assertIn('pos', first_result)
|
89 |
-
self.assertIn('synonyms', first_result)
|
90 |
-
self.assertIsInstance(first_result['synonyms'], list)
|
91 |
|
92 |
def test_get_wordnet_synonyms_custom_entry(self):
|
93 |
word = "happy"
|
94 |
pos = "ADJ"
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
107 |
|
108 |
def test_get_wordnet_synonyms_pos_filter(self):
|
109 |
# Test with POS filtering
|
110 |
word = "hunt"
|
111 |
-
|
112 |
|
113 |
-
self.assertGreater(len(
|
114 |
-
for sense in
|
115 |
-
self.assertEqual(sense['
|
116 |
|
117 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
118 |
def test_get_wordnet_synonyms_generic_exception(self, mock_synsets):
|
@@ -122,7 +131,7 @@ class TestTextParsers2(unittest.TestCase):
|
|
122 |
|
123 |
# 400 Exception intercepted and relaunched as 500
|
124 |
self.assertEqual(context.exception.status_code, 500)
|
125 |
-
self.assertIn("Error retrieving
|
126 |
|
127 |
def test_inflect_synonym_noun_plural(self):
|
128 |
# Test noun pluralization
|
@@ -263,11 +272,11 @@ class TestTextParsers2(unittest.TestCase):
|
|
263 |
self.assertIsInstance(result, list)
|
264 |
self.assertGreater(len(result), 0)
|
265 |
|
266 |
-
first_sense = result[0]
|
267 |
self.assertIn('definition', first_sense)
|
268 |
-
self.assertIn('
|
269 |
|
270 |
-
first_synonym_info = first_sense['
|
271 |
self.assertIn('base_form', first_synonym_info)
|
272 |
self.assertIn('inflected_form', first_synonym_info)
|
273 |
# For a past-tense verb, the inflected form should be different from the base
|
@@ -283,19 +292,24 @@ class TestTextParsers2(unittest.TestCase):
|
|
283 |
'original_indices': {'end': 60, 'start': 55}, 'pos': 'ADJ', 'sentence_position': 9,
|
284 |
'tag': 'JJ', 'word': 'happy'
|
285 |
}
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
self.
|
292 |
-
self.assertIsInstance(
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
301 |
def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
|
|
|
1 |
+
import json
|
2 |
import unittest
|
3 |
from unittest.mock import patch, MagicMock
|
4 |
|
|
|
6 |
|
7 |
from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, get_wordnet_synonyms, inflect_synonym,
|
8 |
is_nlp_available, process_synonym_groups)
|
9 |
+
from my_ghost_writer.jsonpath_comparator import JSONPathComparator
|
10 |
+
from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer, analyze_dict_list_simple
|
11 |
+
from my_ghost_writer.type_hints import TermRelationships
|
12 |
+
from tests import EVENTS_FOLDER
|
13 |
+
from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists
|
14 |
|
15 |
|
16 |
class TestTextParsers2(unittest.TestCase):
|
|
|
83 |
|
84 |
def test_get_wordnet_synonyms(self):
|
85 |
# Test with a word that has known synonyms
|
86 |
+
with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "r") as src:
|
87 |
+
expected_detailed_report = json.load(src)
|
88 |
word = "piano"
|
89 |
+
related_words = get_wordnet_synonyms(word)
|
90 |
+
first_related_words = related_words[0]
|
91 |
+
analyzer = JSONPathStructureAnalyzer()
|
92 |
+
analyzer.extract_all_paths(first_related_words)
|
93 |
+
detailed_report = analyzer.get_detailed_type_report()
|
94 |
+
analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)
|
95 |
|
96 |
+
# with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "w") as src:
|
97 |
+
# json.dump(detailed_report, src)
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
def test_get_wordnet_synonyms_custom_entry(self):
|
100 |
word = "happy"
|
101 |
pos = "ADJ"
|
102 |
+
with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "r") as src:
|
103 |
+
expected_report_dict_list = json.load(src)
|
104 |
+
# expected_detailed_report = []
|
105 |
+
related_word_groups = get_wordnet_synonyms(word, pos)
|
106 |
+
self.assertIsInstance(related_word_groups[0]["examples"], list)
|
107 |
+
for related_word_nth, expected_detailed_report_nth in zip(related_word_groups, expected_report_dict_list):
|
108 |
+
del related_word_nth['examples']
|
109 |
+
analyzer = JSONPathStructureAnalyzer()
|
110 |
+
analyzer.extract_all_paths(related_word_nth)
|
111 |
+
detailed_report = analyzer.get_detailed_type_report(get_samples=False)
|
112 |
+
self.assertDictEqual(detailed_report, expected_detailed_report_nth)
|
113 |
+
# expected_detailed_report.append(detailed_report)
|
114 |
+
# with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "w") as src:
|
115 |
+
# json.dump(expected_detailed_report, src)
|
116 |
|
117 |
def test_get_wordnet_synonyms_pos_filter(self):
|
118 |
# Test with POS filtering
|
119 |
word = "hunt"
|
120 |
+
related_words_verbs = get_wordnet_synonyms(word, pos_tag="VERB")
|
121 |
|
122 |
+
self.assertGreater(len(related_words_verbs), 0)
|
123 |
+
for sense in related_words_verbs:
|
124 |
+
self.assertEqual(sense['wordnet_pos'], 'v') # 'v' is the WordNet tag for VERB
|
125 |
|
126 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
127 |
def test_get_wordnet_synonyms_generic_exception(self, mock_synsets):
|
|
|
131 |
|
132 |
# 400 Exception intercepted and relaunched as 500
|
133 |
self.assertEqual(context.exception.status_code, 500)
|
134 |
+
self.assertIn("Error retrieving related words: 'test exception'", context.exception.detail)
|
135 |
|
136 |
def test_inflect_synonym_noun_plural(self):
|
137 |
# Test noun pluralization
|
|
|
272 |
self.assertIsInstance(result, list)
|
273 |
self.assertGreater(len(result), 0)
|
274 |
|
275 |
+
first_sense = dict(result[0])
|
276 |
self.assertIn('definition', first_sense)
|
277 |
+
self.assertIn('related_words', first_sense)
|
278 |
|
279 |
+
first_synonym_info = dict(first_sense['related_words'][0])
|
280 |
self.assertIn('base_form', first_synonym_info)
|
281 |
self.assertIn('inflected_form', first_synonym_info)
|
282 |
# For a past-tense verb, the inflected form should be different from the base
|
|
|
292 |
'original_indices': {'end': 60, 'start': 55}, 'pos': 'ADJ', 'sentence_position': 9,
|
293 |
'tag': 'JJ', 'word': 'happy'
|
294 |
}
|
295 |
+
# RelatedWordGroup
|
296 |
+
result_related_word_groups_list = process_synonym_groups(word, context_info)
|
297 |
+
self.assertIsInstance(result_related_word_groups_list, list)
|
298 |
+
for related_words_group in result_related_word_groups_list:
|
299 |
+
related_word_group_dict = dict(related_words_group)
|
300 |
+
self.assertIsInstance(related_word_group_dict, dict)
|
301 |
+
self.assertIsInstance(related_word_group_dict["definition"], str)
|
302 |
+
self.assertIn("relation_type", related_word_group_dict)
|
303 |
+
self.assertIn(related_word_group_dict["relation_type"], TermRelationships)
|
304 |
+
self.assertIsInstance(related_word_group_dict["examples"], list)
|
305 |
+
related_words = related_word_group_dict["related_words"]
|
306 |
+
for _word_dict in related_words:
|
307 |
+
word_dict = dict(_word_dict)
|
308 |
+
self.assertIsInstance(word_dict, dict)
|
309 |
+
self.assertIsInstance(word_dict["base_form"], str)
|
310 |
+
self.assertIsInstance(word_dict["inflected_form"], str)
|
311 |
+
self.assertIsInstance(word_dict["matches_context"], bool)
|
312 |
+
self.assertIn("is_custom", word_dict)
|
313 |
|
314 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
315 |
def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
|