alessandro trinca tornidor
commited on
Commit
·
989f544
1
Parent(s):
f3516da
test: update python backend test cases, update logs
Browse files- my_ghost_writer/app.py +12 -6
- my_ghost_writer/text_parsers2.py +1 -1
- tests/test_app.py +0 -4
- tests/test_text_parsers2.py +6 -5
my_ghost_writer/app.py
CHANGED
|
@@ -109,7 +109,7 @@ def health_mongo() -> str:
|
|
| 109 |
def get_words_frequency(body: RequestTextFrequencyBody | str) -> JSONResponse:
|
| 110 |
t0 = datetime.now()
|
| 111 |
app_logger.info(f"body type: {type(body)}.")
|
| 112 |
-
app_logger.
|
| 113 |
body_validated = RequestTextFrequencyBody.model_validate_json(body)
|
| 114 |
text = body_validated.text
|
| 115 |
app_logger.info(f"LOG_LEVEL: '{LOG_LEVEL}', length of text: {len(text)}, type of 'text':'{type(text)}'.")
|
|
@@ -130,7 +130,7 @@ def get_words_frequency(body: RequestTextFrequencyBody | str) -> JSONResponse:
|
|
| 130 |
def get_sentence_sliced_by_word_and_positions(body: RequestSplitText | str) -> JSONResponse:
|
| 131 |
t0 = datetime.now()
|
| 132 |
app_logger.info(f"body type: {type(body)}.")
|
| 133 |
-
app_logger.
|
| 134 |
try:
|
| 135 |
try:
|
| 136 |
body_validated = RequestSplitText.model_validate_json(body)
|
|
@@ -148,8 +148,8 @@ def get_sentence_sliced_by_word_and_positions(body: RequestSplitText | str) -> J
|
|
| 148 |
sentence, start_in_sentence, end_in_sentence = text_parsers.get_sentence_by_word(text, word, start, end)
|
| 149 |
except Exception as e0:
|
| 150 |
app_logger.error(f"end:'{end}', start:'{start}', word:'{word}'.")
|
| 151 |
-
app_logger.
|
| 152 |
-
app_logger.
|
| 153 |
app_logger.error("## error:")
|
| 154 |
app_logger.error(e0)
|
| 155 |
raise e0
|
|
@@ -300,9 +300,15 @@ async def get_synonyms(body: RequestQueryThesaurusInflatedBody):
|
|
| 300 |
end,
|
| 301 |
word
|
| 302 |
)
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
# Process synonym groups
|
| 305 |
processed_synonyms = process_synonym_groups(body.word, context_info)
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
if not processed_synonyms:
|
| 308 |
return JSONResponse(
|
|
@@ -369,7 +375,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
| 369 |
synonym groups for each.
|
| 370 |
"""
|
| 371 |
app_logger.info(f"body tye:{type(body)}!")
|
| 372 |
-
app_logger.
|
| 373 |
t0 = datetime.now()
|
| 374 |
try:
|
| 375 |
body_validated = RequestQueryThesaurusInflatedBody.model_validate_json(body)
|
|
@@ -398,7 +404,7 @@ async def get_synonyms_for_phrase(body: RequestQueryThesaurusInflatedBody):
|
|
| 398 |
t1 = datetime.now()
|
| 399 |
duration = (t1 - t0).total_seconds()
|
| 400 |
app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
|
| 401 |
-
app_logger.
|
| 402 |
|
| 403 |
message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
|
| 404 |
|
|
|
|
| 109 |
def get_words_frequency(body: RequestTextFrequencyBody | str) -> JSONResponse:
|
| 110 |
t0 = datetime.now()
|
| 111 |
app_logger.info(f"body type: {type(body)}.")
|
| 112 |
+
app_logger.debug(f"body: {body}.")
|
| 113 |
body_validated = RequestTextFrequencyBody.model_validate_json(body)
|
| 114 |
text = body_validated.text
|
| 115 |
app_logger.info(f"LOG_LEVEL: '{LOG_LEVEL}', length of text: {len(text)}, type of 'text':'{type(text)}'.")
|
|
|
|
| 130 |
def get_sentence_sliced_by_word_and_positions(body: RequestSplitText | str) -> JSONResponse:
|
| 131 |
t0 = datetime.now()
|
| 132 |
app_logger.info(f"body type: {type(body)}.")
|
| 133 |
+
app_logger.debug(f"body: {body}.")
|
| 134 |
try:
|
| 135 |
try:
|
| 136 |
body_validated = RequestSplitText.model_validate_json(body)
|
|
|
|
| 148 |
sentence, start_in_sentence, end_in_sentence = text_parsers.get_sentence_by_word(text, word, start, end)
|
| 149 |
except Exception as e0:
|
| 150 |
app_logger.error(f"end:'{end}', start:'{start}', word:'{word}'.")
|
| 151 |
+
app_logger.error("text:")
|
| 152 |
+
app_logger.error(text)
|
| 153 |
app_logger.error("## error:")
|
| 154 |
app_logger.error(e0)
|
| 155 |
raise e0
|
|
|
|
| 300 |
end,
|
| 301 |
word
|
| 302 |
)
|
| 303 |
+
t1 = datetime.now()
|
| 304 |
+
duration = (t1 - t0).total_seconds()
|
| 305 |
+
app_logger.info(f"got extract_contextual_info_by_indices() result in: {duration:.3f}s. ...")
|
| 306 |
|
| 307 |
# Process synonym groups
|
| 308 |
processed_synonyms = process_synonym_groups(body.word, context_info)
|
| 309 |
+
t2 = datetime.now()
|
| 310 |
+
duration = (t2 - t1).total_seconds()
|
| 311 |
+
app_logger.info(f"got process_synonym_groups() result in: {duration:.3f}s. ...")
|
| 312 |
|
| 313 |
if not processed_synonyms:
|
| 314 |
return JSONResponse(
|
|
|
|
| 375 |
synonym groups for each.
|
| 376 |
"""
|
| 377 |
app_logger.info(f"body tye:{type(body)}!")
|
| 378 |
+
app_logger.debug(f"body:{body}!")
|
| 379 |
t0 = datetime.now()
|
| 380 |
try:
|
| 381 |
body_validated = RequestQueryThesaurusInflatedBody.model_validate_json(body)
|
|
|
|
| 404 |
t1 = datetime.now()
|
| 405 |
duration = (t1 - t0).total_seconds()
|
| 406 |
app_logger.info(f"got find_synonyms_for_phrase() result in: {duration:.3f}s. ...")
|
| 407 |
+
app_logger.debug(results)
|
| 408 |
|
| 409 |
message = f"Got {len(results)} synonym groups." if results else "No words with synonyms found in the selected phrase."
|
| 410 |
|
my_ghost_writer/text_parsers2.py
CHANGED
|
@@ -132,7 +132,7 @@ def extract_contextual_info_by_indices(text: str, start_idx: int, end_idx: int,
|
|
| 132 |
# If the primary loop didn't find a token, it's an unexpected state,
|
| 133 |
# but the original code to handle this was unreachable.
|
| 134 |
# The most likely failure is now a word/index mismatch, handled above.
|
| 135 |
-
if target_token is None:
|
| 136 |
raise HTTPException(
|
| 137 |
status_code=400,
|
| 138 |
detail=f"Could not find token for word '{target_word}' at indices {start_idx}-{end_idx}"
|
|
|
|
| 132 |
# If the primary loop didn't find a token, it's an unexpected state,
|
| 133 |
# but the original code to handle this was unreachable.
|
| 134 |
# The most likely failure is now a word/index mismatch, handled above.
|
| 135 |
+
if target_token is None or str(target_token) != target_word:
|
| 136 |
raise HTTPException(
|
| 137 |
status_code=400,
|
| 138 |
detail=f"Could not find token for word '{target_word}' at indices {start_idx}-{end_idx}"
|
tests/test_app.py
CHANGED
|
@@ -315,7 +315,3 @@ class TestAppEndpoints(unittest.TestCase):
|
|
| 315 |
self.assertEqual(response.status_code, 503)
|
| 316 |
# Verify the CORS header is set by our custom handler
|
| 317 |
self.assertEqual(response.headers["access-control-allow-origin"], allowed_origin)
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
if __name__ == "__main__":
|
| 321 |
-
unittest.main()
|
|
|
|
| 315 |
self.assertEqual(response.status_code, 503)
|
| 316 |
# Verify the CORS header is set by our custom handler
|
| 317 |
self.assertEqual(response.headers["access-control-allow-origin"], allowed_origin)
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_text_parsers2.py
CHANGED
|
@@ -53,14 +53,15 @@ class TestTextParsers2(unittest.TestCase):
|
|
| 53 |
self.assertEqual(context.exception.detail, "spaCy model not available")
|
| 54 |
|
| 55 |
def test_extract_contextual_info_word_mismatch(self):
|
| 56 |
-
"""Tests that a 400 HTTPException is raised for a word/index mismatch."""
|
| 57 |
text = "The quick brown fox"
|
|
|
|
| 58 |
# Indices point to "quick", but target_word is "brown"
|
| 59 |
with self.assertRaises(HTTPException) as context:
|
| 60 |
-
extract_contextual_info_by_indices(text,
|
| 61 |
|
| 62 |
-
self.assertEqual(context.exception.status_code,
|
| 63 |
-
self.assertIn("
|
| 64 |
|
| 65 |
@patch("my_ghost_writer.text_parsers2.nlp")
|
| 66 |
def test_extract_contextual_info_word_none(self, nlp_mock):
|
|
@@ -259,7 +260,7 @@ class TestTextParsers2(unittest.TestCase):
|
|
| 259 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
| 260 |
def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
|
| 261 |
mock_synsets.return_value = []
|
| 262 |
-
context_info = {'pos': 'VERB'}
|
| 263 |
result = process_synonym_groups("look", context_info)
|
| 264 |
self.assertListEqual(result, [])
|
| 265 |
|
|
|
|
| 53 |
self.assertEqual(context.exception.detail, "spaCy model not available")
|
| 54 |
|
| 55 |
def test_extract_contextual_info_word_mismatch(self):
|
| 56 |
+
"""Tests that a 400->500 HTTPException is raised for a word/index mismatch."""
|
| 57 |
text = "The quick brown fox"
|
| 58 |
+
start_idx, end_idx, target_word = 4, 9, "brown"
|
| 59 |
# Indices point to "quick", but target_word is "brown"
|
| 60 |
with self.assertRaises(HTTPException) as context:
|
| 61 |
+
extract_contextual_info_by_indices(text, start_idx, end_idx, target_word)
|
| 62 |
|
| 63 |
+
self.assertEqual(context.exception.status_code, 500)
|
| 64 |
+
self.assertIn(f"Error analyzing context: 400: Could not find token for word '{target_word}' at indices {start_idx}-{end_idx}", context.exception.detail)
|
| 65 |
|
| 66 |
@patch("my_ghost_writer.text_parsers2.nlp")
|
| 67 |
def test_extract_contextual_info_word_none(self, nlp_mock):
|
|
|
|
| 260 |
@patch("my_ghost_writer.text_parsers2.wn.synsets")
|
| 261 |
def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
|
| 262 |
mock_synsets.return_value = []
|
| 263 |
+
context_info = {'pos': 'VERB', 'lemma': 'look'}
|
| 264 |
result = process_synonym_groups("look", context_info)
|
| 265 |
self.assertListEqual(result, [])
|
| 266 |
|