|
|
#include "unity/unity.h" |
|
|
#include <libxml/HTMLparser.h> |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
|
|
|
|
|
|
extern void test_htmlParseContent(htmlParserCtxtPtr ctxt); |
|
|
|
|
|
|
|
|
typedef struct { |
|
|
char *text; |
|
|
size_t text_len; |
|
|
|
|
|
char *last_comment; |
|
|
int comment_count; |
|
|
|
|
|
char *internal_subset_name; |
|
|
int internal_subset_count; |
|
|
|
|
|
char *end_names[32]; |
|
|
int end_count; |
|
|
} SaxCapture; |
|
|
|
|
|
static void cap_init(SaxCapture *cap) { |
|
|
memset(cap, 0, sizeof(*cap)); |
|
|
} |
|
|
|
|
|
static void cap_free(SaxCapture *cap) { |
|
|
free(cap->text); |
|
|
free(cap->last_comment); |
|
|
free(cap->internal_subset_name); |
|
|
for (int i = 0; i < cap->end_count; i++) |
|
|
free(cap->end_names[i]); |
|
|
} |
|
|
|
|
|
static void cap_append_text(SaxCapture *cap, const char *data, size_t len) { |
|
|
if (len == 0) return; |
|
|
char *newbuf = (char *)realloc(cap->text, cap->text_len + len + 1); |
|
|
if (!newbuf) return; |
|
|
cap->text = newbuf; |
|
|
memcpy(cap->text + cap->text_len, data, len); |
|
|
cap->text_len += len; |
|
|
cap->text[cap->text_len] = '\0'; |
|
|
} |
|
|
|
|
|
|
|
|
static void onCharacters(void *userData, const xmlChar *ch, int len) { |
|
|
SaxCapture *cap = (SaxCapture *)userData; |
|
|
cap_append_text(cap, (const char *)ch, (size_t)len); |
|
|
} |
|
|
|
|
|
static void onEndElement(void *userData, const xmlChar *name) { |
|
|
SaxCapture *cap = (SaxCapture *)userData; |
|
|
if (cap->end_count < (int)(sizeof(cap->end_names)/sizeof(cap->end_names[0]))) { |
|
|
const char *src = (const char *)name; |
|
|
size_t l = strlen(src); |
|
|
cap->end_names[cap->end_count] = (char *)malloc(l + 1); |
|
|
if (cap->end_names[cap->end_count]) { |
|
|
memcpy(cap->end_names[cap->end_count], src, l + 1); |
|
|
cap->end_count++; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
static void onComment(void *userData, const xmlChar *value) { |
|
|
SaxCapture *cap = (SaxCapture *)userData; |
|
|
free(cap->last_comment); |
|
|
cap->last_comment = strdup((const char *)value); |
|
|
cap->comment_count++; |
|
|
} |
|
|
|
|
|
static void onInternalSubset(void *userData, const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) { |
|
|
(void)ExternalID; (void)SystemID; |
|
|
SaxCapture *cap = (SaxCapture *)userData; |
|
|
free(cap->internal_subset_name); |
|
|
cap->internal_subset_name = strdup((const char *)name); |
|
|
cap->internal_subset_count++; |
|
|
} |
|
|
|
|
|
|
|
|
static htmlSAXHandler make_sax(void) { |
|
|
htmlSAXHandler sax; |
|
|
memset(&sax, 0, sizeof(sax)); |
|
|
sax.characters = onCharacters; |
|
|
sax.endElement = onEndElement; |
|
|
sax.comment = onComment; |
|
|
sax.internalSubset = onInternalSubset; |
|
|
return sax; |
|
|
} |
|
|
|
|
|
|
|
|
static htmlParserCtxtPtr make_ctxt_with_sax(const char *data, SaxCapture *cap) { |
|
|
htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(data, (int)strlen(data)); |
|
|
if (ctxt) { |
|
|
static htmlSAXHandler sax; |
|
|
sax = make_sax(); |
|
|
ctxt->sax = &sax; |
|
|
ctxt->userData = cap; |
|
|
ctxt->disableSAX = 0; |
|
|
} |
|
|
return ctxt; |
|
|
} |
|
|
|
|
|
void setUp(void) { |
|
|
|
|
|
} |
|
|
|
|
|
void tearDown(void) { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
void test_htmlParseContent_plain_text(void) { |
|
|
const char *html = "Hello"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
|
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_NOT_NULL(cap.text); |
|
|
TEST_ASSERT_EQUAL_STRING("Hello", cap.text); |
|
|
TEST_ASSERT_EQUAL_INT(0, cap.comment_count); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_element_with_end_tag(void) { |
|
|
const char *html = "<p>Hi</p>"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
|
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_NOT_NULL(cap.text); |
|
|
TEST_ASSERT_EQUAL_STRING("Hi", cap.text); |
|
|
TEST_ASSERT_TRUE(cap.end_count >= 1); |
|
|
|
|
|
TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_comment_and_bogus_comment(void) { |
|
|
const char *html1 = "X<!--abc-->Y"; |
|
|
SaxCapture cap1; cap_init(&cap1); |
|
|
htmlParserCtxtPtr ctxt1 = make_ctxt_with_sax(html1, &cap1); |
|
|
TEST_ASSERT_NOT_NULL(ctxt1); |
|
|
test_htmlParseContent(ctxt1); |
|
|
TEST_ASSERT_EQUAL_INT(1, cap1.comment_count); |
|
|
TEST_ASSERT_EQUAL_STRING("abc", cap1.last_comment); |
|
|
TEST_ASSERT_EQUAL_STRING("XY", cap1.text); |
|
|
htmlFreeParserCtxt(ctxt1); |
|
|
cap_free(&cap1); |
|
|
|
|
|
const char *html2 = "A<!zzz>B"; |
|
|
SaxCapture cap2; cap_init(&cap2); |
|
|
htmlParserCtxtPtr ctxt2 = make_ctxt_with_sax(html2, &cap2); |
|
|
TEST_ASSERT_NOT_NULL(ctxt2); |
|
|
test_htmlParseContent(ctxt2); |
|
|
TEST_ASSERT_EQUAL_INT(1, cap2.comment_count); |
|
|
TEST_ASSERT_EQUAL_STRING("zzz", cap2.last_comment); |
|
|
TEST_ASSERT_EQUAL_STRING("AB", cap2.text); |
|
|
htmlFreeParserCtxt(ctxt2); |
|
|
cap_free(&cap2); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_literal_lt_when_not_a_tag(void) { |
|
|
const char *html = "<1"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_NOT_NULL(cap.text); |
|
|
TEST_ASSERT_EQUAL_STRING("<1", cap.text); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_doctype_in_content(void) { |
|
|
const char *html = "<!DOCTYPE html>Text"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_INT(1, cap.internal_subset_count); |
|
|
TEST_ASSERT_NOT_NULL(cap.internal_subset_name); |
|
|
TEST_ASSERT_EQUAL_STRING("html", cap.internal_subset_name); |
|
|
TEST_ASSERT_EQUAL_STRING("Text", cap.text); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_autoclose_on_end(void) { |
|
|
const char *html = "<p>ab"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_EQUAL_STRING("ab", cap.text); |
|
|
|
|
|
TEST_ASSERT_TRUE(cap.end_count >= 1); |
|
|
TEST_ASSERT_EQUAL_STRING("p", cap.end_names[0]); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
void test_htmlParseContent_script_mode_preserves_lt_inside_script(void) { |
|
|
const char *html = "<script>1<2</script>"; |
|
|
SaxCapture cap; cap_init(&cap); |
|
|
htmlParserCtxtPtr ctxt = make_ctxt_with_sax(html, &cap); |
|
|
TEST_ASSERT_NOT_NULL(ctxt); |
|
|
|
|
|
test_htmlParseContent(ctxt); |
|
|
|
|
|
TEST_ASSERT_NOT_NULL(cap.text); |
|
|
TEST_ASSERT_EQUAL_STRING("1<2", cap.text); |
|
|
|
|
|
TEST_ASSERT_TRUE(cap.end_count >= 1); |
|
|
|
|
|
int saw_script = 0; |
|
|
for (int i = 0; i < cap.end_count; i++) { |
|
|
if (strcmp(cap.end_names[i], "script") == 0) { |
|
|
saw_script = 1; break; |
|
|
} |
|
|
} |
|
|
TEST_ASSERT_TRUE(saw_script); |
|
|
|
|
|
htmlFreeParserCtxt(ctxt); |
|
|
cap_free(&cap); |
|
|
} |
|
|
|
|
|
int main(void) { |
|
|
UNITY_BEGIN(); |
|
|
RUN_TEST(test_htmlParseContent_plain_text); |
|
|
RUN_TEST(test_htmlParseContent_element_with_end_tag); |
|
|
RUN_TEST(test_htmlParseContent_comment_and_bogus_comment); |
|
|
RUN_TEST(test_htmlParseContent_literal_lt_when_not_a_tag); |
|
|
RUN_TEST(test_htmlParseContent_doctype_in_content); |
|
|
RUN_TEST(test_htmlParseContent_autoclose_on_end); |
|
|
RUN_TEST(test_htmlParseContent_script_mode_preserves_lt_inside_script); |
|
|
return UNITY_END(); |
|
|
} |