|
|
#include "unity/unity.h" |
|
|
#include <libxml/HTMLparser.h> |
|
|
|
|
|
#include <libxml/parserInternals.h> |
|
|
#include <libxml/parser.h> |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
|
|
|
|
|
|
int test_htmlValidateUtf8(xmlParserCtxtPtr ctxt, const xmlChar *str, size_t len, int partial); |
|
|
|
|
|
|
|
|
static xmlParserCtxtPtr make_ctxt(void) { |
|
|
htmlParserCtxtPtr hctxt = htmlNewParserCtxt(); |
|
|
TEST_ASSERT_NOT_NULL(hctxt); |
|
|
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)hctxt; |
|
|
|
|
|
xmlParserInputPtr input = xmlNewInputStream(ctxt); |
|
|
TEST_ASSERT_NOT_NULL(input); |
|
|
xmlPushInput(ctxt, input); |
|
|
|
|
|
|
|
|
if (ctxt->input) |
|
|
ctxt->input->flags = 0; |
|
|
|
|
|
TEST_ASSERT_NOT_NULL(ctxt->input); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
return ctxt; |
|
|
} |
|
|
|
|
|
static void free_ctxt(xmlParserCtxtPtr ctxt) { |
|
|
if (ctxt != NULL) { |
|
|
htmlFreeParserCtxt((htmlParserCtxtPtr)ctxt); |
|
|
} |
|
|
} |
|
|
|
|
|
void setUp(void) { |
|
|
|
|
|
} |
|
|
|
|
|
void tearDown(void) { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char seq[] = { 0xC2, 0xA2 }; |
|
|
int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(2, ret); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char seq[] = { 0xC1, 0x80 }; |
|
|
unsigned int before = ctxt->input->flags; |
|
|
int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(-1, ret); |
|
|
TEST_ASSERT_TRUE(ctxt->input->flags != before); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char seq[] = { 0xC2, 0x20 }; |
|
|
int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(-1, ret); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char seq[] = { 0xC2 }; |
|
|
int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 1); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(0, ret); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char seq[] = { 0xC2 }; |
|
|
int ret = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq, sizeof(seq), 0); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(-1, ret); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_valid_3byte_general_and_minimum(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char euro[] = { 0xE2, 0x82, 0xAC }; |
|
|
int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)euro, sizeof(euro), 0); |
|
|
TEST_ASSERT_EQUAL_INT(3, r1); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
const unsigned char min3[] = { 0xE0, 0xA0, 0x80 }; |
|
|
int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min3, sizeof(min3), 0); |
|
|
TEST_ASSERT_EQUAL_INT(3, r2); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
|
|
|
const unsigned char ed_valid[] = { 0xED, 0x9F, 0xBF }; |
|
|
int r3 = test_htmlValidateUtf8(ctxt, (const xmlChar *)ed_valid, sizeof(ed_valid), 0); |
|
|
TEST_ASSERT_EQUAL_INT(3, r3); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate(void) { |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char overlong[] = { 0xE0, 0x80, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong, sizeof(overlong), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char surrogate[] = { 0xED, 0xA0, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)surrogate, sizeof(surrogate), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_valid_4byte_boundaries(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char min4[] = { 0xF0, 0x90, 0x80, 0x80 }; |
|
|
int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)min4, sizeof(min4), 0); |
|
|
TEST_ASSERT_EQUAL_INT(4, r1); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
const unsigned char max4[] = { 0xF4, 0x8F, 0xBF, 0xBF }; |
|
|
int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)max4, sizeof(max4), 0); |
|
|
TEST_ASSERT_EQUAL_INT(4, r2); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max(void) { |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char overlong4[] = { 0xF0, 0x80, 0x80, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)overlong4, sizeof(overlong4), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char above_max[] = { 0xF4, 0x90, 0x80, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)above_max, sizeof(above_max), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior(void) { |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char seq3[] = { 0xE2, 0x82 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq3, sizeof(seq3), 1); |
|
|
TEST_ASSERT_EQUAL_INT(0, r); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 1); |
|
|
TEST_ASSERT_EQUAL_INT(0, r); |
|
|
TEST_ASSERT_EQUAL_UINT(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
{ |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
const unsigned char seq4[] = { 0xF0, 0x90, 0x80 }; |
|
|
int r = test_htmlValidateUtf8(ctxt, (const xmlChar *)seq4, sizeof(seq4), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r); |
|
|
TEST_ASSERT_NOT_EQUAL(0u, ctxt->input->flags); |
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
} |
|
|
|
|
|
void test_htmlValidateUtf8_error_flag_only_set_once_per_context(void) { |
|
|
xmlParserCtxtPtr ctxt = make_ctxt(); |
|
|
|
|
|
const unsigned char bad1[] = { 0xC1, 0x80 }; |
|
|
const unsigned char bad2[] = { 0xE0, 0x80, 0x80 }; |
|
|
|
|
|
int r1 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad1, sizeof(bad1), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r1); |
|
|
unsigned int flags_after_first = ctxt->input->flags; |
|
|
TEST_ASSERT_NOT_EQUAL(0u, flags_after_first); |
|
|
|
|
|
int r2 = test_htmlValidateUtf8(ctxt, (const xmlChar *)bad2, sizeof(bad2), 0); |
|
|
TEST_ASSERT_EQUAL_INT(-1, r2); |
|
|
unsigned int flags_after_second = ctxt->input->flags; |
|
|
|
|
|
|
|
|
TEST_ASSERT_EQUAL_UINT(flags_after_first, flags_after_second); |
|
|
|
|
|
free_ctxt(ctxt); |
|
|
} |
|
|
|
|
|
int main(void) { |
|
|
xmlInitParser(); |
|
|
|
|
|
UNITY_BEGIN(); |
|
|
RUN_TEST(test_htmlValidateUtf8_valid_2byte_returns_size_and_no_error_flag); |
|
|
RUN_TEST(test_htmlValidateUtf8_invalid_start_byte_below_C2_sets_flag_and_returns_minus1); |
|
|
RUN_TEST(test_htmlValidateUtf8_invalid_2byte_continuation_sets_flag); |
|
|
RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_partial_returns_zero_no_flag); |
|
|
RUN_TEST(test_htmlValidateUtf8_incomplete_2byte_nonpartial_sets_flag_and_minus1); |
|
|
RUN_TEST(test_htmlValidateUtf8_valid_3byte_general_and_minimum); |
|
|
RUN_TEST(test_htmlValidateUtf8_invalid_3byte_overlong_and_surrogate); |
|
|
RUN_TEST(test_htmlValidateUtf8_valid_4byte_boundaries); |
|
|
RUN_TEST(test_htmlValidateUtf8_invalid_4byte_overlong_and_above_max); |
|
|
RUN_TEST(test_htmlValidateUtf8_incomplete_3_and_4_byte_partial_behavior); |
|
|
RUN_TEST(test_htmlValidateUtf8_error_flag_only_set_once_per_context); |
|
|
int rc = UNITY_END(); |
|
|
|
|
|
xmlCleanupParser(); |
|
|
return rc; |
|
|
} |