import unittest import json from my_ghost_writer.constants import app_logger from my_ghost_writer.jsonpath_comparator import (compare_json_with_jsonpath, compare_json_with_jsonpath_and_types, JSONPathComparator, extract_structure_paths, extract_structure_paths_with_types) from my_ghost_writer.jsonpath_extractor import (JSONPathStructureAnalyzer, analyze_with_jsonpath_detailed, analyze_with_jsonpath_types, analyze_with_jsonpath, compare_json_with_jsonpath_structures, analyze_dict_list_simple) old_json = { "key1": "row 1", "key2": 22, "key_nested1": { "key_nested2": "row 3", "key_nested3": "row 4", "array_nested_4": [ "row 5", "row 6", "row 7 nested", { "key_nested4": "row 8", "array_nested_5": ["row 9", "row 10"] } ] } } new_json = { "key1": "row 1", "key2": 22, "key_nested1": { "key_nested2": "row 3", "key_nested3": "row 4", "array_changed_4": [ "row 5", "row changed 6", "row 7 nested", { "last_change": "row 8", "array_nested_5": ["row 9", "row 10"] } ] } } class TestJSONPathStructureAnalyzer(unittest.TestCase): """ Test JSONPath structure analysis with the provided nested JSON data """ def test_get_paths_with_types_basic(self): """ Test get_paths_with_types with basic data types """ test_data = { "string_field": "hello", "int_field": 42, "float_field": 3.14, "bool_field": True, "null_field": None } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) paths_with_types = analyzer.get_paths_with_types() # Verify basic types self.assertEqual(paths_with_types["$.string_field"], "str") self.assertEqual(paths_with_types["$.int_field"], "int") self.assertEqual(paths_with_types["$.float_field"], "float") self.assertEqual(paths_with_types["$.bool_field"], "bool") self.assertEqual(paths_with_types["$.null_field"], "NoneType") def test_get_paths_with_types_arrays(self): """ Test get_paths_with_types with arrays """ test_data = { "simple_array": [1, 2, 3], "empty_array": [], "mixed_array": ["string", 42, True], "nested_array": [[1, 2], [3, 4]] } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) paths_with_types = analyzer.get_paths_with_types() self.assertEqual(paths_with_types["$.simple_array[*]"], "array") self.assertEqual(paths_with_types["$.empty_array[*]"], "array") self.assertEqual(paths_with_types["$.mixed_array[*]"], "array") self.assertEqual(paths_with_types["$.nested_array[*]"], "array") def test_get_paths_with_types_with_old_json(self): """ Test get_paths_with_types with the old_json test data """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) paths_with_types = analyzer.get_paths_with_types() # Test specific paths from old_json self.assertEqual(paths_with_types["$.key1"], "str") self.assertEqual(paths_with_types["$.key2"], "int") self.assertEqual(paths_with_types["$.key_nested1"], "dict") self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "array") self.assertEqual(paths_with_types["$.key_nested1.key_nested2"], "str") # Verify all expected paths are present expected_paths = [ "$.key1", "$.key2", "$.key_nested1", "$.key_nested1.key_nested2", "$.key_nested1.key_nested3", "$.key_nested1.array_nested_4[*]" ] for path in expected_paths: self.assertIn(path, paths_with_types, f"Path {path} should be in paths_with_types") def test_get_detailed_type_report_basic(self): """ Test get_detailed_type_report with basic data """ test_data = { "test_field": "sample_value", "array_field": [1, 2, 3] } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) detailed_report = analyzer.get_detailed_type_report() # Test structure of the detailed report self.assertIn("$.test_field", detailed_report) self.assertIn("$.array_field[*]", detailed_report) # Test field details field_info = detailed_report["$.test_field"] self.assertIn("types", field_info) self.assertIn("primary_type", field_info) self.assertIn("is_array", field_info) self.assertIn("samples", field_info) self.assertIn("sample_count", field_info) # Verify field values self.assertEqual(field_info["primary_type"], "str") self.assertFalse(field_info["is_array"]) self.assertIn("sample_value", field_info["samples"]) self.assertGreater(field_info["sample_count"], 0) # Test array field details array_info = detailed_report["$.array_field[*]"] self.assertTrue(array_info["is_array"]) self.assertEqual(array_info["primary_type"], "array") self.assertEqual(array_info['array_length'], 3) def test_get_detailed_type_report_with_old_json(self): """ Test get_detailed_type_report with old_json data """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) detailed_report = analyzer.get_detailed_type_report() # Test specific fields from old_json key1_info = detailed_report["$.key1"] self.assertEqual(key1_info["primary_type"], "str") self.assertFalse(key1_info["is_array"]) self.assertIn("row 1", key1_info["samples"]) key2_info = detailed_report["$.key2"] self.assertEqual(key2_info["primary_type"], "int") self.assertFalse(key2_info["is_array"]) self.assertIn("22", key2_info["samples"]) # Test array field array_info = detailed_report["$.key_nested1.array_nested_4[*]"] self.assertTrue(array_info["is_array"]) self.assertEqual(array_info["primary_type"], "array") self.assertEqual(array_info["array_length"], 4) def test_get_detailed_type_report_mixed_types(self): """ Test get_detailed_type_report with mixed types (hypothetical case) """ # Create a scenario where a path might have mixed types analyzer = JSONPathStructureAnalyzer() # Manually add mixed type data to test the logic analyzer.paths.add("$.mixed_field") analyzer.types["$.mixed_field"].add("str") analyzer.types["$.mixed_field"].add("int") analyzer.samples["$.mixed_field"] = ["hello", "42"] detailed_report = analyzer.get_detailed_type_report() mixed_info = detailed_report["$.mixed_field"] self.assertIn("mixed(", mixed_info["primary_type"]) self.assertFalse(mixed_info["is_array"]) self.assertEqual(len(mixed_info["types"]), 2) def test_analyze_with_jsonpath_types_function(self): """ Test the convenience function analyze_with_jsonpath_types """ test_data = { "name": "test", "count": 5, "items": ["a", "b", "c"] } paths_with_types = analyze_with_jsonpath_types(test_data) # Verify function returns expected structure self.assertIsInstance(paths_with_types, dict) self.assertIn("$.name", paths_with_types) self.assertIn("$.count", paths_with_types) self.assertIn("$.items[*]", paths_with_types) # Verify types self.assertEqual(paths_with_types["$.name"], "str") self.assertEqual(paths_with_types["$.count"], "int") self.assertEqual(paths_with_types["$.items[*]"], "array") def test_analyze_with_jsonpath_detailed_function(self): """ Test the convenience function analyze_with_jsonpath_detailed """ test_data = { "description": "test description", "tags": ["tag1", "tag2"] } detailed_info = analyze_with_jsonpath_detailed(test_data) # Verify function returns expected structure self.assertIsInstance(detailed_info, dict) self.assertIn("$.description", detailed_info) self.assertIn("$.tags[*]", detailed_info) # Verify detailed structure desc_info = detailed_info["$.description"] self.assertIn("types", desc_info) self.assertIn("primary_type", desc_info) self.assertIn("samples", desc_info) self.assertEqual(desc_info["primary_type"], "str") tags_info = detailed_info["$.tags[*]"] self.assertTrue(tags_info["is_array"]) self.assertEqual(tags_info["primary_type"], "array") self.assertEqual(tags_info["array_length"], 2) def test_get_paths_with_types_empty_data(self): """ Test get_paths_with_types with empty data """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths({}) paths_with_types = analyzer.get_paths_with_types() # Should return empty dict for empty input self.assertEqual(len(paths_with_types), 0) def test_get_detailed_type_report_empty_data(self): """ Test get_detailed_type_report with empty data """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths({}) detailed_report = analyzer.get_detailed_type_report() # Should return empty dict for empty input self.assertEqual(len(detailed_report), 0) def test_paths_with_types_comparison_old_vs_new(self): """ Test comparing paths with types between old and new JSON """ analyzer_old = JSONPathStructureAnalyzer() analyzer_old.extract_all_paths(old_json) old_paths_with_types = analyzer_old.get_paths_with_types() analyzer_new = JSONPathStructureAnalyzer() analyzer_new.extract_all_paths(new_json) new_paths_with_types = analyzer_new.get_paths_with_types() # Find differences old_only = set(old_paths_with_types.keys()) - set(new_paths_with_types.keys()) new_only = set(new_paths_with_types.keys()) - set(old_paths_with_types.keys()) common = set(old_paths_with_types.keys()) & set(new_paths_with_types.keys()) # Verify expected differences self.assertIn("$.key_nested1.array_nested_4[*]", old_only) self.assertIn("$.key_nested1.array_changed_4[*]", new_only) # Verify common paths have same types for path in common: self.assertEqual(old_paths_with_types[path], new_paths_with_types[path], f"Type mismatch for common path {path}") def test_detailed_report_comparison_old_vs_new(self): """ Test comparing detailed reports between old and new JSON """ old_detailed = analyze_with_jsonpath_detailed(old_json) new_detailed = analyze_with_jsonpath_detailed(new_json) # Check that common fields have consistent detailed info common_paths = set(old_detailed.keys()) & set(new_detailed.keys()) for path in common_paths: old_info = old_detailed[path] new_info = new_detailed[path] # Primary types should match for common paths self.assertEqual(old_info["primary_type"], new_info["primary_type"], f"Primary type mismatch for {path}") # Array status should match self.assertEqual(old_info["is_array"], new_info["is_array"], f"Array status mismatch for {path}") def test_integration_all_new_methods(self): """ Integration test using all new methods together """ test_data = { "user": { "name": "John Doe", "age": 30, "hobbies": ["reading", "coding", "gaming"], "profile": { "active": True, "settings": { "theme": "dark", "notifications": False } } } } # Test all three approaches structure_report = analyze_with_jsonpath(test_data) paths_with_types = analyze_with_jsonpath_types(test_data) detailed_info = analyze_with_jsonpath_detailed(test_data) # Verify all methods found the same paths report_paths = set() for line in structure_report.split('\n'): if ' -- ' in line: path = line.split(' -- ')[0] report_paths.add(path) types_paths = set(paths_with_types.keys()) detailed_paths = set(detailed_info.keys()) # All methods should find the same paths self.assertEqual(report_paths, types_paths) self.assertEqual(types_paths, detailed_paths) # Verify specific expected paths exist expected_paths = [ "$.user", "$.user.name", "$.user.age", "$.user.hobbies[*]", "$.user.profile", "$.user.profile.active", "$.user.profile.settings", "$.user.profile.settings.theme", "$.user.profile.settings.notifications" ] for path in expected_paths: self.assertIn(path, types_paths, f"Path {path} should be found by all methods") self.assertIn(path, detailed_paths, f"Path {path} should be in detailed info") def test_type_consistency_across_methods(self): """ Test that type information is consistent across different methods """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) # Get data using different methods # structure_report = analyzer.get_structure_report() paths_with_types = analyzer.get_paths_with_types() detailed_report = analyzer.get_detailed_type_report() # For each path, verify consistency for path in paths_with_types: # Detailed report should have the same primary type if path in detailed_report: detailed_type = detailed_report[path]["primary_type"] simple_type = paths_with_types[path] # They should match (detailed might have more info for mixed types) if not detailed_type.startswith("mixed("): self.assertEqual(simple_type, detailed_type, f"Type inconsistency for {path}: {simple_type} vs {detailed_type}") def test_extract_all_paths_from_old_structure(self): """ Test that analyzer correctly extracts all paths from the old JSON structure """ analyzer = JSONPathStructureAnalyzer() paths = analyzer.extract_all_paths(old_json) # Verify the top-level paths self.assertIn("$.key1", paths) self.assertIn("$.key2", paths) self.assertIn("$.key_nested1", paths) # Verify the nested object paths self.assertIn("$.key_nested1.key_nested2", paths) self.assertIn("$.key_nested1.key_nested3", paths) self.assertIn("$.key_nested1.array_nested_4[*]", paths) # Verify the deeply nested paths (3-4 levels deep) self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", paths) self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*]", paths) def test_extract_all_paths_from_new_structure(self): """ Test that analyzer correctly extracts paths from the new JSON structure """ analyzer = JSONPathStructureAnalyzer() paths = analyzer.extract_all_paths(new_json) # Verify renamed the array path self.assertIn("$.key_nested1.array_changed_4[*]", paths) # Verify renamed the nested key self.assertIn("$.key_nested1.array_changed_4[*].last_change", paths) # Verify unchanged the nested array self.assertIn("$.key_nested1.array_changed_4[*].array_nested_5[*]", paths) def test_structure_report_format_old_json(self): """ Test structure report format for old JSON """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) report = analyzer.get_structure_report() # Check specific format elements self.assertIn("$.key1 -- row 1", report) self.assertIn("$.key2 -- 22", report) self.assertIn("$.key_nested1.array_nested_4[*] -- array[4]", report) self.assertIn("$.key_nested1.array_nested_4[*].key_nested4 -- row 8", report) self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report) def test_structure_report_format_new_json(self): """ Test structure report format for new JSON """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(new_json) report = analyzer.get_structure_report() # Check renamed elements appear correctly self.assertIn("$.key_nested1.array_changed_4[*] -- array[4]", report) self.assertIn("$.key_nested1.array_changed_4[*].last_change -- row 8", report) # Check unchanged elements self.assertIn("$.key1 -- row 1", report) self.assertIn("$.key2 -- 22", report) def test_analyze_with_jsonpath_function(self): """ Test the convenience function for structure analysis """ old_report = analyze_with_jsonpath(old_json) new_report = analyze_with_jsonpath(new_json) # Verify both reports are valid strings self.assertIsInstance(old_report, str) self.assertGreater(len(old_report), 0) self.assertIsInstance(new_report, str) self.assertGreater(len(new_report), 0) # Verify key differences self.assertIn("array_nested_4", old_report) self.assertIn("array_changed_4", new_report) self.assertIn("key_nested4", old_report) self.assertIn("last_change", new_report) def test_compare_json_structures_method(self): """ Test the compare_json_structures method directly """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) comparison = analyzer.compare_json_structures(new_json) # Test all expected keys within the comparison result expected_keys = [ "added_paths", "removed_paths", "common_paths", "type_changes", "value_differences", "array_size_changes", "array_lengths_old", "array_lengths_new", "summary" ] for key in expected_keys: self.assertIn(key, comparison, f"Key {key} should be in comparison result") # Test summary statistics summary = comparison["summary"] self.assertGreater(summary["total_paths_old"], 0) self.assertGreater(summary["total_paths_new"], 0) self.assertGreater(summary["paths_removed"], 0) self.assertGreater(summary["paths_added"], 0) # Test array length tracking self.assertIn("$.key_nested1.array_nested_4[*]", comparison["array_lengths_old"]) self.assertIn("$.key_nested1.array_changed_4[*]", comparison["array_lengths_new"]) self.assertEqual(comparison["array_lengths_old"]["$.key_nested1.array_nested_4[*]"], 4) self.assertEqual(comparison["array_lengths_new"]["$.key_nested1.array_changed_4[*]"], 4) def test_get_array_lengths_method(self): """ Test the get_array_lengths method """ test_data = { "empty_array": [], "small_array": [1, 2], "large_array": list(range(100)), "nested": { "inner_array": ["a", "b", "c", "d", "e"] } } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) array_lengths = analyzer.get_array_lengths() # Test all array lengths are captured self.assertEqual(array_lengths["$.empty_array[*]"], 0) self.assertEqual(array_lengths["$.small_array[*]"], 2) self.assertEqual(array_lengths["$.large_array[*]"], 100) self.assertEqual(array_lengths["$.nested.inner_array[*]"], 5) # Test that non-array paths are not in array_lengths for path in array_lengths.keys(): self.assertTrue(path.endswith("[*]"), f"Array length path {path} should end with [*]") def test_value_differences_detection(self): """ Test detection of value changes in compare_json_structures """ old_data = { "name": "John", "age": 25, "city": "New York" } new_data = { "name": "John", # unchanged "age": 26, # changed "city": "Boston" # changed } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_data) comparison = analyzer.compare_json_structures(new_data) # Should detect value changes self.assertIn("$.age", comparison["value_differences"]) self.assertIn("$.city", comparison["value_differences"]) self.assertNotIn("$.name", comparison["value_differences"]) # Test change details age_change = comparison["value_differences"]["$.age"] self.assertEqual(age_change["old_value"], "25") self.assertEqual(age_change["new_value"], "26") city_change = comparison["value_differences"]["$.city"] self.assertEqual(city_change["old_value"], "New York") self.assertEqual(city_change["new_value"], "Boston") def test_array_size_changes_detection(self): """ Test detection of array size changes """ old_data = { "items": [1, 2, 3], "tags": ["a", "b"] } new_data = { "items": [1, 2, 3, 4, 5], # size increased "tags": ["a"] # size decreased } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_data) comparison = analyzer.compare_json_structures(new_data) # Should detect array size changes self.assertIn("$.items[*]", comparison["array_size_changes"]) self.assertIn("$.tags[*]", comparison["array_size_changes"]) # Test size change details items_change = comparison["array_size_changes"]["$.items[*]"] self.assertEqual(items_change["old_size"], 3) self.assertEqual(items_change["new_size"], 5) self.assertEqual(items_change["size_change"], 2) tags_change = comparison["array_size_changes"]["$.tags[*]"] self.assertEqual(tags_change["old_size"], 2) self.assertEqual(tags_change["new_size"], 1) self.assertEqual(tags_change["size_change"], -1) def test_compare_json_with_jsonpath_structures_function(self): """ Test the compare_json_with_jsonpath_structures convenience function """ # Test with print_report=False comparison = compare_json_with_jsonpath_structures(old_json, new_json, print_report=False) # Should return the same structure as the method self.assertIn("summary", comparison) self.assertIn("added_paths", comparison) self.assertIn("removed_paths", comparison) # Test that it works without printing (no exception thrown) self.assertIsInstance(comparison, dict) # Test specific changes self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) def test_nested_arrays_length_tracking(self): """ Test array length tracking for deeply nested arrays """ test_data = { "level1": [ {"level2": [1, 2, 3]}, {"level2": [4, 5]}, {"level2": [6, 7, 8, 9]} ] } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) array_lengths = analyzer.get_array_lengths() # Should track both levels of arrays self.assertIn("$.level1[*]", array_lengths) self.assertIn("$.level1[*].level2[*]", array_lengths) # Check lengths self.assertEqual(array_lengths["$.level1[*]"], 3) # Note: The nested array length will be from the last item processed (current implementation) self.assertEqual(array_lengths["$.level1[*].level2[*]"], 4) def test_type_changes_detection_in_comparison(self): """ Test detection of type changes in compare_json_structures """ old_data = { "field1": "string_value", "field2": 42, "field3": [1, 2, 3] } new_data = { "field1": 123, "field2": 42, "field3": "not_array" } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_data) comparison = analyzer.compare_json_structures(new_data) # Should detect type changes self.assertIn("$.field1", comparison["type_changes"]) self.assertIn("$.field3", comparison["type_changes"]) self.assertNotIn("$.field2", comparison["type_changes"]) # Test change details field1_change = comparison["type_changes"]["$.field1"] self.assertEqual(field1_change["old_type"], "str") self.assertEqual(field1_change["new_type"], "int") field3_change = comparison["type_changes"]["$.field3"] self.assertEqual(field3_change["new_type"], "str") # This will check the type and expect a list self.assertEqual(field3_change["old_type"], "list") def test_analyze_dict_list_simple(self): """ Test analyze_dict_list_simple function with a list of dictionaries """ dict_list = [ { "user": "john", "age": 25, "tags": ["admin", "user"] }, { "user": "jane", "age": 30, "tags": ["user"], "active": True }, { "user": "bob", "score": 95.5, "tags": ["guest", "temp", "new"] } ] # Test the function results = analyze_dict_list_simple(dict_list) # Basic structure tests self.assertEqual(len(results), 3) self.assertIsInstance(results, list) # Test each result has expected keys for i, result in enumerate(results): self.assertEqual(result["index"], i) self.assertIn("paths_with_types", result) self.assertIn("detailed_report", result) self.assertIn("array_lengths", result) self.assertIn("structure_report", result) # Test first dict analysis first_result = results[0] self.assertIn("$.user", first_result["paths_with_types"]) self.assertIn("$.age", first_result["paths_with_types"]) self.assertIn("$.tags[*]", first_result["paths_with_types"]) self.assertEqual(first_result["paths_with_types"]["$.user"], "str") self.assertEqual(first_result["paths_with_types"]["$.age"], "int") self.assertEqual(first_result["paths_with_types"]["$.tags[*]"], "array") self.assertEqual(first_result["array_lengths"]["$.tags[*]"], 2) # Test second dict has additional field second_result = results[1] self.assertIn("$.active", second_result["paths_with_types"]) self.assertEqual(second_result["paths_with_types"]["$.active"], "bool") self.assertEqual(second_result["array_lengths"]["$.tags[*]"], 1) # Test third dict differences third_result = results[2] self.assertIn("$.score", third_result["paths_with_types"]) self.assertNotIn("$.age", third_result["paths_with_types"]) # age missing in third dict self.assertEqual(third_result["paths_with_types"]["$.score"], "float") self.assertEqual(third_result["array_lengths"]["$.tags[*]"], 3) # Test structure reports are strings for result in results: self.assertIsInstance(result["structure_report"], str) self.assertGreater(len(result["structure_report"]), 0) # Test detailed reports have proper structure for result in results: detailed = result["detailed_report"] for path, info in detailed.items(): self.assertIn("types", info) self.assertIn("primary_type", info) self.assertIn("is_array", info) self.assertIn("samples", info) self.assertIn("sample_count", info) def test_filter_paths_excluding_keys(self): """ Test filtering paths to exclude specific keys """ test_data = { 'definition': 'enjoying or showing or marked by joy or pleasure', 'examples': ['a happy smile', 'spent many happy days on the beach'], 'related_words': [{'base_form': 'euphoric'}, {'base_form': 'elated'}], 'relation_type': 'also_see', 'source': 'wordnet', 'wordnet_pos': 'a' } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) # Test without exclusion all_paths = analyzer.paths self.assertIn("$.examples[*]", all_paths) self.assertIn("$.definition", all_paths) # Test with exclusion filtered_paths = analyzer.filter_paths_excluding_keys({'examples'}) self.assertNotIn("$.examples[*]", filtered_paths) self.assertIn("$.definition", filtered_paths) self.assertIn("$.related_words[*]", filtered_paths) self.assertIn("$.related_words[*].base_form", filtered_paths) # Test excluding multiple keys filtered_paths_multi = analyzer.filter_paths_excluding_keys({'examples', 'source'}) self.assertNotIn("$.examples[*]", filtered_paths_multi) self.assertNotIn("$.source", filtered_paths_multi) self.assertIn("$.definition", filtered_paths_multi) def test_get_filtered_structure_report(self): """ Test filtered structure report generation """ test_data = { 'definition': 'test definition', 'examples': ['example1', 'example2'], 'metadata': {'source': 'test', 'version': 1}, 'tags': ['tag1', 'tag2', 'tag3'] } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) # Test filtered report filtered_report = analyzer.get_filtered_structure_report({'examples'}) # Should not contain examples self.assertNotIn("examples", filtered_report) # Should contain other fields self.assertIn("$.definition", filtered_report) self.assertIn("$.metadata", filtered_report) self.assertIn("$.tags[*]", filtered_report) # Test structure lines = filtered_report.split('\n') self.assertGreater(len(lines), 0) # Verify specific content self.assertIn("$.definition -- test definition", filtered_report) self.assertIn("$.tags[*] -- array[3]", filtered_report) def test_get_filtered_paths_with_types(self): """ Test filtered paths with types """ test_data = { 'name': 'test', 'count': 42, 'items': [1, 2, 3], 'exclude_me': {'nested': 'value'} } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) # Test filtered paths with types filtered_paths_types = analyzer.get_filtered_paths_with_types({'exclude_me'}) # Should not contain excluded paths self.assertNotIn("$.exclude_me", filtered_paths_types) self.assertNotIn("$.exclude_me.nested", filtered_paths_types) # Should contain other paths self.assertIn("$.name", filtered_paths_types) self.assertIn("$.count", filtered_paths_types) self.assertIn("$.items[*]", filtered_paths_types) # Test types self.assertEqual(filtered_paths_types["$.name"], "str") self.assertEqual(filtered_paths_types["$.count"], "int") self.assertEqual(filtered_paths_types["$.items[*]"], "array") def test_get_filtered_detailed_type_report(self): """ Test filtered detailed type report """ test_data = { 'title': 'Sample Title', 'description': 'Sample Description', 'private_data': {'secret': 'hidden'}, 'public_list': ['item1', 'item2'] } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) # Test filtered detailed report filtered_detailed = analyzer.get_filtered_detailed_type_report({'private_data'}) # Should not contain excluded paths self.assertNotIn("$.private_data", filtered_detailed) self.assertNotIn("$.private_data.secret", filtered_detailed) # Should contain other paths self.assertIn("$.title", filtered_detailed) self.assertIn("$.public_list[*]", filtered_detailed) # Test structure of remaining items title_info = filtered_detailed["$.title"] self.assertEqual(title_info["primary_type"], "str") self.assertFalse(title_info["is_array"]) self.assertIn("Sample Title", title_info["samples"]) list_info = filtered_detailed["$.public_list[*]"] self.assertEqual(list_info["primary_type"], "array") self.assertTrue(list_info["is_array"]) self.assertEqual(list_info["array_length"], 2) def test_analyze_dict_list_simple_with_exclusion(self): """ Test analyze_dict_list_simple with key exclusion """ dict_list = [ { "name": "John", "age": 25, "private_info": {"ssn": "123-45-6789"}, "tags": ["user", "admin"] }, { "name": "Jane", "age": 30, "private_info": {"ssn": "987-65-4321"}, "tags": ["user"], "active": True } ] # Test with exclusion results = analyze_dict_list_simple(dict_list, exclude_keys={'private_info'}) # Basic structure tests self.assertEqual(len(results), 2) # Test that private_info is excluded from all results for result in results: paths_with_types = result["paths_with_types"] detailed_report = result["detailed_report"] # Should not contain private_info paths private_paths = [path for path in paths_with_types.keys() if 'private_info' in path] self.assertEqual(len(private_paths), 0, "private_info paths should be excluded") private_detailed = [path for path in detailed_report.keys() if 'private_info' in path] self.assertEqual(len(private_detailed), 0, "private_info should be excluded from detailed report") # Should contain other paths self.assertIn("$.name", paths_with_types) self.assertIn("$.age", paths_with_types) self.assertIn("$.tags[*]", paths_with_types) # Test second dict has additional field (but not private_info) second_result = results[1] self.assertIn("$.active", second_result["paths_with_types"]) self.assertEqual(second_result["paths_with_types"]["$.active"], "bool") # Test structure reports don't contain excluded keys for result in results: structure_report = result["structure_report"] self.assertNotIn("private_info", structure_report) self.assertIn("$.name", structure_report) def test_exclusion_with_nested_arrays(self): """ Test exclusion works with nested arrays and complex structures """ test_data = { "valid_data": { "items": [ {"id": 1, "name": "item1"}, {"id": 2, "name": "item2"} ] }, "sensitive_data": { "secrets": [ {"key": "secret1", "value": "hidden1"}, {"key": "secret2", "value": "hidden2"} ] } } analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(test_data) # Test exclusion of nested structure filtered_paths = analyzer.filter_paths_excluding_keys({'sensitive_data'}) # Should exclude all sensitive_data paths sensitive_paths = [path for path in analyzer.paths if 'sensitive_data' in path] self.assertGreater(len(sensitive_paths), 0, "Should have sensitive_data paths in original") for sensitive_path in sensitive_paths: self.assertNotIn(sensitive_path, filtered_paths, f"Should exclude {sensitive_path}") # Should keep valid_data paths self.assertIn("$.valid_data", filtered_paths) self.assertIn("$.valid_data.items[*]", filtered_paths) self.assertIn("$.valid_data.items[*].id", filtered_paths) self.assertIn("$.valid_data.items[*].name", filtered_paths) class TestJSONPathComparator(unittest.TestCase): """ Test JSONPath structure comparison functionality """ def test_extract_structure_paths_comparison(self): """ Test extraction of structure paths from both JSON structures """ old_paths = extract_structure_paths(old_json) new_paths = extract_structure_paths(new_json) # Verify we get a reasonable number of paths self.assertGreaterEqual(len(old_paths), 7) self.assertGreaterEqual(len(new_paths), 7) # Verify specific differences self.assertIn("$.key_nested1.array_nested_4[*]", old_paths) self.assertIn("$.key_nested1.array_changed_4[*]", new_paths) self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_paths) self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_paths) def test_extract_structure_paths_with_types(self): """ Test extraction of structure paths with type information """ old_paths_with_types = extract_structure_paths_with_types(old_json) new_paths_with_types = extract_structure_paths_with_types(new_json) # Verify we get type information self.assertEqual(old_paths_with_types["$.key1"], "string") self.assertEqual(old_paths_with_types["$.key2"], "integer") self.assertEqual(old_paths_with_types["$.key_nested1"], "object") # Check the actual array field, not the [*] path self.assertIn("array", old_paths_with_types["$.key_nested1.array_nested_4"]) # The [*] path represents the type of array elements (first element) self.assertEqual(old_paths_with_types["$.key_nested1.array_nested_4[*]"], "string") # Verify type differences between old and new self.assertIn("$.key_nested1.array_nested_4", old_paths_with_types) self.assertIn("$.key_nested1.array_changed_4", new_paths_with_types) self.assertNotIn("$.key_nested1.array_nested_4", new_paths_with_types) def test_compare_structures_array_rename(self): """ Test comparison detects array field rename """ comparator = JSONPathComparator() comparison = comparator.compare_structures(old_json, new_json) # Should detect removed paths (old structure) self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", comparison["removed_paths"]) # Should detect added paths (new structure) self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) self.assertIn("$.key_nested1.array_changed_4[*].last_change", comparison["added_paths"]) def test_compare_structures_with_types(self): """ Test comparison with type information """ comparator = JSONPathComparator() comparison = comparator.compare_structures_with_types(old_json, new_json) # Should detect added paths with types self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"]) # Check for the actual existing paths in the comparison # The deeply nested paths might not be included in the type comparison if "$.key_nested1.array_changed_4[*].last_change" in comparison["added_paths"]: self.assertIn("string", comparison["added_paths"]["$.key_nested1.array_changed_4[*].last_change"]) # Should detect removed paths with types self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"]) # Should detect common paths with types self.assertIn("$.key1", comparison["common_paths"]) self.assertEqual(comparison["common_paths"]["$.key1"], "string") # Should not detect type changes for this example (same types, different paths) self.assertEqual(len(comparison["type_changes"]), 0) def test_type_changes_detection(self): """ Test detection of type changes in paths """ # Create test data with type changes json_with_string = {"test_field": "hello"} json_with_number = {"test_field": 42} comparator = JSONPathComparator() comparison = comparator.compare_structures_with_types(json_with_string, json_with_number) # Should detect type change self.assertIn("$.test_field", comparison["type_changes"]) self.assertEqual(comparison["type_changes"]["$.test_field"]["old_type"], "string") self.assertEqual(comparison["type_changes"]["$.test_field"]["new_type"], "integer") def test_compare_structures_unchanged_paths(self): """ Test that unchanged paths are correctly identified """ comparator = JSONPathComparator() comparison = comparator.compare_structures(old_json, new_json) # These paths should remain unchanged unchanged_paths = [ "$.key1", "$.key2", "$.key_nested1", "$.key_nested1.key_nested2", "$.key_nested1.key_nested3" ] for path in unchanged_paths: self.assertIn(path, comparison["common_paths"], f"Path {path} should be in common paths") self.assertNotIn(path, comparison["added_paths"], f"Path {path} should not be added") self.assertNotIn(path, comparison["removed_paths"], f"Path {path} should not be removed") def test_compare_structures_nested_array_preserved(self): """ Test the deeply nested array structure is preserved despite parent changes """ comparator = JSONPathComparator() comparison = comparator.compare_structures(old_json, new_json) # The nested array should exist in both (though path changed due to parent rename) old_nested_array = "$.key_nested1.array_nested_4[*].array_nested_5[*]" new_nested_array = "$.key_nested1.array_changed_4[*].array_nested_5[*]" self.assertIn(old_nested_array, comparison["removed_paths"]) self.assertIn(new_nested_array, comparison["added_paths"]) def test_path_validations_with_specific_paths(self): """ Test validation of specific paths between old and new structures """ common_paths = [ "$.key1", # Should exist in both "$.key2", # Should exist in both "$.key_nested1.array_nested_4[*]", # Exists only in old "$.key_nested1.array_changed_4[*]", # Exists only in new "$.key_nested1.key_nested2" # Should exist in both ] comparator = JSONPathComparator(common_paths) comparison = comparator.compare_structures(old_json, new_json) validations = comparison["path_validations"] # Test paths that exist in both self.assertEqual(validations["$.key1"]["status"], "✅") self.assertTrue(validations["$.key1"]["old_found"]) self.assertTrue(validations["$.key1"]["new_found"]) # Test paths that exist only in old self.assertEqual(validations["$.key_nested1.array_nested_4[*]"]["status"], "❌") self.assertTrue(validations["$.key_nested1.array_nested_4[*]"]["old_found"]) self.assertFalse(validations["$.key_nested1.array_nested_4[*]"]["new_found"]) # Test paths that exist only in new self.assertEqual(validations["$.key_nested1.array_changed_4[*]"]["status"], "❌") self.assertFalse(validations["$.key_nested1.array_changed_4[*]"]["old_found"]) self.assertTrue(validations["$.key_nested1.array_changed_4[*]"]["new_found"]) class TestJSONPathIntegration(unittest.TestCase): """ Integration tests for the complete JSONPath diff workflow """ def test_complete_diff_workflow(self): """ Test the complete workflow from analysis to comparison using the provided data """ # Step 1: Analyze the old structure old_report = analyze_with_jsonpath(old_json) self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_report) # Step 2: Analyze the new structure new_report = analyze_with_jsonpath(new_json) self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_report) # Step 3: Compare structures critical_paths = [ "$.key1", "$.key2", "$.key_nested1.key_nested2", "$.key_nested1.key_nested3" ] comparison = compare_json_with_jsonpath(old_json, new_json, critical_paths) # Verify comparison results self.assertIsInstance(comparison, dict) self.assertGreater(len(comparison["added_paths"]), 0) self.assertGreater(len(comparison["removed_paths"]), 0) self.assertGreater(len(comparison["common_paths"]), 0) def test_complete_diff_workflow_with_types(self): """ Test the complete workflow with type information """ # Step 1: Compare structures with types critical_paths = [ "$.key1", "$.key2", "$.key_nested1.key_nested2" ] comparison = compare_json_with_jsonpath_and_types(old_json, new_json, critical_paths) # Verify comparison results include type information self.assertIsInstance(comparison, dict) self.assertIn("added_paths", comparison) self.assertIn("removed_paths", comparison) self.assertIn("type_changes", comparison) # Verify type information is included if comparison["added_paths"]: for path, type_info in comparison["added_paths"].items(): self.assertIsInstance(type_info, str) self.assertGreater(len(type_info), 0) def test_detect_specific_changes(self): """ Test detection of the specific changes between old and new JSON """ comparison = compare_json_with_jsonpath(old_json, new_json) # Key changes that should be detected: # 1. array_nested_4 -> array_changed_4 # 2. key_nested4 -> last_change expected_removed = [ "$.key_nested1.array_nested_4[*]", "$.key_nested1.array_nested_4[*].key_nested4" ] expected_added = [ "$.key_nested1.array_changed_4[*]", "$.key_nested1.array_changed_4[*].last_change" ] for path in expected_removed: self.assertIn(path, comparison["removed_paths"], f"Expected removed path {path} not found") for path in expected_added: self.assertIn(path, comparison["added_paths"], f"Expected added path {path} not found") def test_structure_variations_old(self): """ Test that old JSON structure contains expected array name """ analyzer = JSONPathStructureAnalyzer() paths = analyzer.extract_all_paths(old_json) expected_path = "$.key_nested1.array_nested_4[*]" self.assertIn(expected_path, paths, f"Expected path {expected_path} not found") def test_structure_variations_new(self): """ Test that new JSON structure contains expected array name """ analyzer = JSONPathStructureAnalyzer() paths = analyzer.extract_all_paths(new_json) expected_path = "$.key_nested1.array_changed_4[*]" self.assertIn(expected_path, paths, f"Expected path {expected_path} not found") def test_json_string_compatibility(self): """ Test that the tools work with JSON strings (serialized/deserialized) """ # Convert to JSON string and back old_string = json.dumps(old_json) new_string = json.dumps(new_json) old_parsed = json.loads(old_string) new_parsed = json.loads(new_string) # Should work the same as with dict objects comparison = compare_json_with_jsonpath(old_parsed, new_parsed) self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) class TestEdgeCases(unittest.TestCase): """ Test edge cases with the provided data structure """ def test_empty_json_comparison(self): """ Test comparison with empty JSON """ empty_json = {} comparison = compare_json_with_jsonpath(old_json, empty_json) # All old paths should be removed self.assertGreater(len(comparison["removed_paths"]), 0) self.assertEqual(len(comparison["added_paths"]), 0) self.assertEqual(len(comparison["common_paths"]), 0) def test_empty_json_comparison_with_types(self): """ Test comparison with empty JSON including types """ empty_json = {} comparison = compare_json_with_jsonpath_and_types(old_json, empty_json) # All old paths should be removed with type info self.assertGreater(len(comparison["removed_paths"]), 0) self.assertEqual(len(comparison["added_paths"]), 0) # Empty JSON still has the root path "$" in common # The comparison includes the root "$" path in both structures self.assertLessEqual(len(comparison["common_paths"]), 1) # Verify type information is preserved for path, type_info in comparison["removed_paths"].items(): self.assertIsInstance(type_info, str) def test_identical_json_comparison(self): """ Test comparison of identical JSON structures """ comparison = compare_json_with_jsonpath(old_json, old_json) # Should have no changes self.assertEqual(len(comparison["added_paths"]), 0) self.assertEqual(len(comparison["removed_paths"]), 0) self.assertGreater(len(comparison["common_paths"]), 0) def test_identical_json_comparison_with_types(self): """ Test comparison of identical JSON structures with types """ comparison = compare_json_with_jsonpath_and_types(old_json, old_json) # Should have no changes self.assertEqual(len(comparison["added_paths"]), 0) self.assertEqual(len(comparison["removed_paths"]), 0) self.assertEqual(len(comparison["type_changes"]), 0) self.assertGreater(len(comparison["common_paths"]), 0) def test_deep_nested_array_analysis(self): """ Test analysis of the deepest nested array (array_nested_5) """ analyzer = JSONPathStructureAnalyzer() analyzer.extract_all_paths(old_json) report = analyzer.get_structure_report() # Should properly analyze the deeply nested array self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report) def test_array_type_detection(self): """ Test proper detection of array types in extract_structure_paths_with_types """ paths_with_types = extract_structure_paths_with_types(old_json) # Test array type detection on the actual array field, not the [*] path # The array field itself should have "array" in its type self.assertIn("array", paths_with_types["$.key_nested1.array_nested_4"]) # The [*] path represents the type of array elements (the first element is a string) self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "string") class TestSimpleUsageExamples(unittest.TestCase): """ Simple examples showing how to use the JSONPath diff tool """ def test_basic_structure_analysis_old(self): """ Analyze the structure of the old JSON """ report = analyze_with_jsonpath(old_json) app_logger.info("\nOLD JSON STRUCTURE:") app_logger.info(report) # Basic assertions self.assertIn("$.key1 -- row 1", report) self.assertIn("$.key2 -- 22", report) self.assertIn("array_nested_4", report) self.assertIn("key_nested4", report) def test_basic_structure_analysis_new(self): """ Analyze the structure of the new JSON """ report = analyze_with_jsonpath(new_json) app_logger.info("\nNEW JSON STRUCTURE:") app_logger.info(report) # Basic assertions self.assertIn("$.key1 -- row 1", report) self.assertIn("$.key2 -- 22", report) self.assertIn("array_changed_4", report) self.assertIn("last_change", report) def test_basic_comparison(self): """ Compare old versus new JSON structures """ app_logger.info("\nCOMPARISON RESULTS:") comparison = compare_json_with_jsonpath(old_json, new_json) # Verify the main changes self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) # Verify unchanged elements self.assertIn("$.key1", comparison["common_paths"]) self.assertIn("$.key2", comparison["common_paths"]) def test_basic_comparison_with_types(self): """ Compare old versus new JSON structures with type information """ app_logger.info("\nCOMPARISON RESULTS WITH TYPES:") comparison = compare_json_with_jsonpath_and_types(old_json, new_json) # Verify the main changes with types self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"]) self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"]) # Verify type information is included self.assertEqual(comparison["common_paths"]["$.key1"], "string") self.assertEqual(comparison["common_paths"]["$.key2"], "integer") if __name__ == '__main__': unittest.main()