#include "unity/unity.h" #include #include /* for xmlNewInputStream, xmlPushInput */ #include /* for xmlInitParser, xmlCleanupParser */ #include #include /* Wrapper provided in the module to access the static function */ int test_htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len, int partial); /* Helpers to create and free a clean parser context with a valid input */ static xmlParserCtxtPtr make_ctxt(void) { htmlParserCtxtPtr hctxt = htmlNewParserCtxt(); TEST_ASSERT_NOT_NULL(hctxt); xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)hctxt; xmlParserInputPtr input = xmlNewInputStream(ctxt); TEST_ASSERT_NOT_NULL(input); xmlPushInput(ctxt, input); /* Ensure flags start at 0 for predictable testing */ if (ctxt->input) ctxt->input->flags = 0; TEST_ASSERT_NOT_NULL(ctxt->input); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); return ctxt; } static void free_ctxt(xmlParserCtxtPtr ctxt) { if (ctxt != NULL) { htmlFreeParserCtxt((htmlParserCtxtPtr)ctxt); } } void setUp(void) { /* Nothing required per-test */ } void tearDown(void) { /* Nothing required per-test */ } /* ---------- Tests ---------- */ void test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq[] = { 0xC2, 0xA2 }; /* U+00A2 */ int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); TEST_ASSERT_EQUAL_INT(2, ret); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq[] = { 0xC1, 0x80 }; /* Overlong lead byte -> invalid */ unsigned int before = ctxt->input->flags; int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); TEST_ASSERT_EQUAL_INT(-1, ret); TEST_ASSERT_TRUE(ctxt->input->flags != before); free_ctxt(ctxt); } void test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq[] = { 0xC2, 0x20 }; /* Bad continuation (not 10xxxxxx) */ int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); TEST_ASSERT_EQUAL_INT(-1, ret); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq[] = { 0xC2 }; /* Incomplete */ int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 1); TEST_ASSERT_EQUAL_INT(0, ret); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq[] = { 0xC2 }; /* Incomplete */ int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); TEST_ASSERT_EQUAL_INT(-1, ret); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_valid_3byte_general_and_minimum(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char euro[] = { 0xE2, 0x82, 0xAC }; /* U+20AC */ int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)euro, sizeof(euro), 0); TEST_ASSERT_EQUAL_INT(3, r1); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); const unsigned char min3[] = { 0xE0, 0xA0, 0x80 }; /* U+0800 minimal 3-byte */ int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min3, sizeof(min3), 0); TEST_ASSERT_EQUAL_INT(3, r2); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); /* 0xED with first cont < 0xA0 should be valid (below surrogates) */ const unsigned char ed_valid[] = { 0xED, 0x9F, 0xBF }; /* U+D7FF */ int r3 = test_htmlValidateUtf8(ctxt, (const xmlChar *)ed_valid, sizeof(ed_valid), 0); TEST_ASSERT_EQUAL_INT(3, r3); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate(void) { /* Overlong: E0 80 80 */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char overlong[] = { 0xE0, 0x80, 0x80 }; int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong, sizeof(overlong), 0); TEST_ASSERT_EQUAL_INT(-1, r); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } /* Surrogate range: ED A0 80 */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char surrogate[] = { 0xED, 0xA0, 0x80 }; /* U+D800 -> invalid */ int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)surrogate, sizeof(surrogate), 0); TEST_ASSERT_EQUAL_INT(-1, r); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } } void test_htmlValidateUtf8_valid_4byte_boundaries(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char min4[] = { 0xF0, 0x90, 0x80, 0x80 }; /* U+10000 */ int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min4, sizeof(min4), 0); TEST_ASSERT_EQUAL_INT(4, r1); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); const unsigned char max4[] = { 0xF4, 0x8F, 0xBF, 0xBF }; /* U+10FFFF */ int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)max4, sizeof(max4), 0); TEST_ASSERT_EQUAL_INT(4, r2); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } void test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max(void) { /* Overlong for F0: second byte too low */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char overlong4[] = { 0xF0, 0x80, 0x80, 0x80 }; int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong4, sizeof(overlong4), 0); TEST_ASSERT_EQUAL_INT(-1, r); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } /* Above Unicode max: F4 90 80 80 */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char above_max[] = { 0xF4, 0x90, 0x80, 0x80 }; int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)above_max, sizeof(above_max), 0); TEST_ASSERT_EQUAL_INT(-1, r); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } } void test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior(void) { /* Incomplete 3-byte, partial=1 -> 0, no flag */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq3[] = { 0xE2, 0x82 }; /* Missing 3rd byte */ int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq3, sizeof(seq3), 1); TEST_ASSERT_EQUAL_INT(0, r); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } /* Incomplete 4-byte, partial=1 -> 0, no flag */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; /* Missing 4th byte */ int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 1); TEST_ASSERT_EQUAL_INT(0, r); TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); free_ctxt(ctxt); } /* Incomplete 4-byte, partial=0 -> -1, sets flag */ { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; /* Missing 4th byte */ int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 0); TEST_ASSERT_EQUAL_INT(-1, r); TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); free_ctxt(ctxt); } } void test_htmlValidateUtf8_error_flag_only_set_once_per_context(void) { xmlParserCtxtPtr ctxt = make_ctxt(); const unsigned char bad1[] = { 0xC1, 0x80 }; /* invalid */ const unsigned char bad2[] = { 0xE0, 0x80, 0x80 };/* invalid */ int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad1, sizeof(bad1), 0); TEST_ASSERT_EQUAL_INT(-1, r1); unsigned int flags_after_first = ctxt->input->flags; TEST_ASSERT_NOT_EQUAL(0u, flags_after_first); int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad2, sizeof(bad2), 0); TEST_ASSERT_EQUAL_INT(-1, r2); unsigned int flags_after_second = ctxt->input->flags; /* Should not clear or change the encoding error flag after the first error */ TEST_ASSERT_EQUAL_UINT(flags_after_first, flags_after_second); free_ctxt(ctxt); } int main(void) { xmlInitParser(); UNITY_BEGIN(); RUN_TEST(test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag); RUN_TEST(test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1); RUN_TEST(test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag); RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag); RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1); RUN_TEST(test_htmlValidateUtf8_valid_3byte_general_and_minimum); RUN_TEST(test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate); RUN_TEST(test_htmlValidateUtf8_valid_4byte_boundaries); RUN_TEST(test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max); RUN_TEST(test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior); RUN_TEST(test_htmlValidateUtf8_error_flag_only_set_once_per_context); int rc = UNITY_END(); xmlCleanupParser(); return rc; }