libxml / tests /tests_HTMLparser_areBlanks.c
AryaWu's picture
Upload folder using huggingface_hub
6baed57 verified
#include "unity/unity.h"
#include <libxml/HTMLparser.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
#include <string.h>
#include <stdlib.h>
/* Wrapper for the static function provided in the module */
extern int test_areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len);
static htmlParserCtxtPtr make_ctx_with_input(const char *s) {
return htmlCreateMemoryParserCtxt(s, (int)strlen(s));
}
void setUp(void) {
xmlInitParser();
}
void tearDown(void) {
/* No global cleanup here to avoid interfering between tests */
}
/* Non-whitespace in str should return -1, independent of ctxt */
void test_areBlanks_non_whitespace_returns_minus1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("");
TEST_ASSERT_NOT_NULL(ctxt);
const xmlChar *str = BAD_CAST "a";
int res = test_areBlanks(ctxt, str, 1);
TEST_ASSERT_EQUAL_INT(-1, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR == 0 (empty input) returns 1 for all-whitespace str */
void test_areBlanks_empty_input_cur0_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("");
TEST_ASSERT_NOT_NULL(ctxt);
const xmlChar *str = BAD_CAST " \t";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(1, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR != '<' returns 0 for all-whitespace str */
void test_areBlanks_cur_not_lt_returns0(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("x");
TEST_ASSERT_NOT_NULL(ctxt);
const xmlChar *str = BAD_CAST " \n";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(0, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR == '<' and name == NULL returns 1 */
void test_areBlanks_cur_lt_name_null_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = NULL;
const xmlChar *str = BAD_CAST " \t";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(1, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR == '<' and name == "html" returns 1 */
void test_areBlanks_name_html_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "html";
const xmlChar *str = BAD_CAST " ";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(1, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR == '<' and name == "head" returns 1 */
void test_areBlanks_name_head_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "head";
const xmlChar *str = BAD_CAST "\n\t ";
int res = test_areBlanks(ctxt, str, 3);
TEST_ASSERT_EQUAL_INT(1, res);
htmlFreeParserCtxt(ctxt);
}
/* CUR == '<', name == "body", and HTML 4.01 DTD returns 1 */
void test_areBlanks_name_body_html401_dtd_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "body";
xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
TEST_ASSERT_NOT_NULL(doc);
xmlCreateIntSubset(doc, BAD_CAST "html",
BAD_CAST "-//W3C//DTD HTML 4.01//EN",
NULL);
ctxt->myDoc = doc;
const xmlChar *str = BAD_CAST " \t ";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(1, res);
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
}
/* node == NULL returns 0 (after previous early checks) */
void test_areBlanks_node_null_returns0(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "nothtml";
ctxt->myDoc = NULL;
ctxt->node = NULL;
const xmlChar *str = BAD_CAST " \n";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(0, res);
htmlFreeParserCtxt(ctxt);
}
/* lastChild == NULL and node is non-element with content -> returns 0 */
void test_areBlanks_lastChild_null_non_element_with_content_returns0(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "x_test_tag";
xmlNodePtr textNode = xmlNewText(BAD_CAST "abc");
TEST_ASSERT_NOT_NULL(textNode);
ctxt->node = textNode;
const xmlChar *str = BAD_CAST " ";
int res = test_areBlanks(ctxt, str, (int)xmlStrlen(str));
TEST_ASSERT_EQUAL_INT(0, res);
xmlFreeNode(textNode);
htmlFreeParserCtxt(ctxt);
}
/* lastChild is a text node -> returns 0 */
void test_areBlanks_lastChild_is_text_returns0(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "x_test_tag";
xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
TEST_ASSERT_NOT_NULL(doc);
xmlNodePtr root = xmlNewNode(NULL, BAD_CAST "div");
TEST_ASSERT_NOT_NULL(root);
xmlDocSetRootElement(doc, root);
xmlAddChild(root, xmlNewText(BAD_CAST "hi"));
ctxt->myDoc = doc;
ctxt->node = root;
const xmlChar *str = BAD_CAST " ";
int res = test_areBlanks(ctxt, str, 1);
TEST_ASSERT_EQUAL_INT(0, res);
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
}
/* lastChild is a comment only; it is skipped -> behaves like lastChild == NULL; expect 1 */
void test_areBlanks_comment_only_child_skipped_then_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "unlikely_tag_name_123";
xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
TEST_ASSERT_NOT_NULL(doc);
xmlNodePtr root = xmlNewNode(NULL, BAD_CAST "container");
TEST_ASSERT_NOT_NULL(root);
xmlDocSetRootElement(doc, root);
xmlNodePtr comment = xmlNewComment(BAD_CAST "just a comment");
TEST_ASSERT_NOT_NULL(comment);
xmlAddChild(root, comment);
ctxt->myDoc = doc;
ctxt->node = root;
const xmlChar *str = BAD_CAST "\t ";
int res = test_areBlanks(ctxt, str, 2);
TEST_ASSERT_EQUAL_INT(1, res);
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
}
/* lastChild is an element with a name unlikely in allowPCData -> expect 1 */
void test_areBlanks_lastChild_element_not_in_pcdata_returns1(void) {
htmlParserCtxtPtr ctxt = make_ctx_with_input("<");
TEST_ASSERT_NOT_NULL(ctxt);
ctxt->name = BAD_CAST "unlikely_parent_name_456";
xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
TEST_ASSERT_NOT_NULL(doc);
xmlNodePtr root = xmlNewNode(NULL, BAD_CAST "rootx");
TEST_ASSERT_NOT_NULL(root);
xmlDocSetRootElement(doc, root);
xmlNodePtr child = xmlNewNode(NULL, BAD_CAST "zzz_not_pcdata");
TEST_ASSERT_NOT_NULL(child);
xmlAddChild(root, child);
ctxt->myDoc = doc;
ctxt->node = root;
const xmlChar *str = BAD_CAST " ";
int res = test_areBlanks(ctxt, str, 1);
TEST_ASSERT_EQUAL_INT(1, res);
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_areBlanks_non_whitespace_returns_minus1);
RUN_TEST(test_areBlanks_empty_input_cur0_returns1);
RUN_TEST(test_areBlanks_cur_not_lt_returns0);
RUN_TEST(test_areBlanks_cur_lt_name_null_returns1);
RUN_TEST(test_areBlanks_name_html_returns1);
RUN_TEST(test_areBlanks_name_head_returns1);
RUN_TEST(test_areBlanks_name_body_html401_dtd_returns1);
RUN_TEST(test_areBlanks_node_null_returns0);
RUN_TEST(test_areBlanks_lastChild_null_non_element_with_content_returns0);
RUN_TEST(test_areBlanks_lastChild_is_text_returns0);
RUN_TEST(test_areBlanks_comment_only_child_skipped_then_returns1);
RUN_TEST(test_areBlanks_lastChild_element_not_in_pcdata_returns1);
return UNITY_END();
}