File size: 7,524 Bytes
6baed57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#include "unity/unity.h"
#include <libxml/HTMLparser.h>

#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdio.h>

/* Helper: compare raw output to an expected C string */
static void assert_output_equals(const unsigned char *out, int outlen, const char *expected) {
    size_t exp_len = strlen(expected);
    TEST_ASSERT_EQUAL_INT((int)exp_len, outlen);
    TEST_ASSERT_EQUAL_UINT8_ARRAY(expected, out, exp_len);
}

/* Helper: check if output matches one of two expected strings */
static void assert_output_equals_either(const unsigned char *out, int outlen,
                                        const char *exp1, const char *exp2) {
    size_t len1 = strlen(exp1);
    size_t len2 = strlen(exp2);
    bool match1 = (outlen == (int)len1) && (memcmp(out, exp1, len1) == 0);
    bool match2 = (outlen == (int)len2) && (memcmp(out, exp2, len2) == 0);
    if (!(match1 || match2)) {
        /* Provide informative failure with a printable copy */
        char buf[64];
        int n = outlen < (int)sizeof(buf) - 1 ? outlen : (int)sizeof(buf) - 1;
        memcpy(buf, out, n);
        buf[n] = '\0';
        TEST_FAIL_MESSAGE("Output did not match either expected entity representation");
    }
}

void setUp(void) {
    /* Setup code here, or leave empty */
}

void tearDown(void) {
    /* Cleanup code here, or leave empty */
}

/* Test: in == NULL triggers initialization: ret == 0, outlen and inlen set to 0 */
void test_htmlUTF8ToHtml_initialization_with_null_in(void) {
    unsigned char outbuf[16] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = 123; /* arbitrary non-zero */
    int ret = htmlUTF8ToHtml(outbuf, &outLen, NULL, &inLen);
    TEST_ASSERT_EQUAL_INT(0, ret);
    TEST_ASSERT_EQUAL_INT(0, outLen);
    TEST_ASSERT_EQUAL_INT(0, inLen);
}

/* Test: out == NULL should return an error (<0) without crashing */
void test_htmlUTF8ToHtml_null_out_pointer_returns_error(void) {
    const unsigned char inbuf[] = { 'A' };
    int outLen = 16;
    int inLen = (int)sizeof(inbuf);
    int ret = htmlUTF8ToHtml(NULL, &outLen, inbuf, &inLen);
    TEST_ASSERT_LESS_THAN_INT(0, ret);
}

/* Test: ASCII passthrough */
void test_htmlUTF8ToHtml_ascii_passthrough(void) {
    const unsigned char inbuf[] = "Hello";
    unsigned char outbuf[64];
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)strlen((const char *)inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_EQUAL_INT(5, ret);
    TEST_ASSERT_EQUAL_INT(5, outLen);
    TEST_ASSERT_EQUAL_INT(5, inLen);

    /* Make a temporary NUL-terminated copy to compare as string */
    unsigned char tmp[64];
    memcpy(tmp, outbuf, outLen);
    tmp[outLen] = '\0';
    TEST_ASSERT_EQUAL_STRING("Hello", (const char *)tmp);
}

/* Test: basic 2-byte UTF-8 -> entity (&copy; or &#169;) */
void test_htmlUTF8ToHtml_two_byte_utf8_to_entity_copy(void) {
    const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* U+00A9 COPYRIGHT SIGN */
    unsigned char outbuf[32] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_EQUAL_INT(outLen, ret);
    TEST_ASSERT_EQUAL_INT(2, inLen);
    /* Accept either named or numeric */
    assert_output_equals_either(outbuf, outLen, "&copy;", "&#169;");
}

/* Test: mixed ASCII and 2-byte UTF-8 -> entity sequence */
void test_htmlUTF8ToHtml_mixed_ascii_and_entity(void) {
    const unsigned char inbuf[] = { 'A', 0xC2, 0xA9, 'B' }; /* "A©B" */
    unsigned char outbuf[64] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_EQUAL_INT(outLen, ret);
    TEST_ASSERT_EQUAL_INT(4, inLen);
    /* Expect "A&copy;B" or "A&#169;B" */
    const char *exp1 = "A&copy;B";
    const char *exp2 = "A&#169;B";
    assert_output_equals_either(outbuf, outLen, exp1, exp2);
}

/* Test: 4-byte UTF-8 (U+1F600 GRINNING FACE) -> numeric entity */
void test_htmlUTF8ToHtml_four_byte_utf8_to_numeric_entity(void) {
    const unsigned char inbuf[] = { 0xF0, 0x9F, 0x98, 0x80 }; /* U+1F600 */
    unsigned char outbuf[32] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_EQUAL_INT(outLen, ret);
    TEST_ASSERT_EQUAL_INT(4, inLen);
    assert_output_equals(outbuf, outLen, "&#128512;");
}

/* Test: incomplete multibyte sequence is not consumed and no error; prior ASCII remains */
void test_htmlUTF8ToHtml_incomplete_multibyte_sequence(void) {
    const unsigned char inbuf[] = { 'A', 0xC2 }; /* 'A' followed by first byte of 2-byte sequence */
    unsigned char outbuf[16] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    /* Should process 'A' and stop before incomplete sequence */
    TEST_ASSERT_EQUAL_INT(1, ret);
    TEST_ASSERT_EQUAL_INT(1, outLen);
    TEST_ASSERT_EQUAL_INT(1, inLen);
    TEST_ASSERT_EQUAL_HEX8('A', outbuf[0]);
}

/* Test: output buffer too small for entity -> error, nothing consumed */
void test_htmlUTF8ToHtml_outbuf_too_small_for_entity(void) {
    const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* © -> requires 6 bytes (&copy; or &#169;) */
    unsigned char outbuf[5] = {0}; /* deliberately too small */
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_LESS_THAN_INT(0, ret); /* space error */
    TEST_ASSERT_EQUAL_INT(0, outLen);
    TEST_ASSERT_EQUAL_INT(0, inLen);
}

/* Test: out buffer too small mid-ASCII -> error, partial consumption accounted */
void test_htmlUTF8ToHtml_outbuf_too_small_mid_ascii(void) {
    const unsigned char inbuf[] = { 'A', 'B', 'C' };
    unsigned char outbuf[2] = {0};
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_LESS_THAN_INT(0, ret); /* space error */
    TEST_ASSERT_EQUAL_INT(2, outLen);
    TEST_ASSERT_EQUAL_INT(2, inLen);
    TEST_ASSERT_EQUAL_HEX8('A', outbuf[0]);
    TEST_ASSERT_EQUAL_HEX8('B', outbuf[1]);
}

/* Test: exact buffer boundary for entity succeeds */
void test_htmlUTF8ToHtml_exact_boundary_for_entity(void) {
    const unsigned char inbuf[] = { 0xC2, 0xA9 }; /* © */
    unsigned char outbuf[6] = {0}; /* exactly enough for &copy; or &#169; */
    int outLen = (int)sizeof(outbuf);
    int inLen = (int)sizeof(inbuf);

    int ret = htmlUTF8ToHtml(outbuf, &outLen, inbuf, &inLen);

    TEST_ASSERT_EQUAL_INT(6, ret);
    TEST_ASSERT_EQUAL_INT(6, outLen);
    TEST_ASSERT_EQUAL_INT(2, inLen);
    assert_output_equals_either(outbuf, outLen, "&copy;", "&#169;");
}

int main(void) {
    UNITY_BEGIN();
    RUN_TEST(test_htmlUTF8ToHtml_initialization_with_null_in);
    RUN_TEST(test_htmlUTF8ToHtml_null_out_pointer_returns_error);
    RUN_TEST(test_htmlUTF8ToHtml_ascii_passthrough);
    RUN_TEST(test_htmlUTF8ToHtml_two_byte_utf8_to_entity_copy);
    RUN_TEST(test_htmlUTF8ToHtml_mixed_ascii_and_entity);
    RUN_TEST(test_htmlUTF8ToHtml_four_byte_utf8_to_numeric_entity);
    RUN_TEST(test_htmlUTF8ToHtml_incomplete_multibyte_sequence);
    RUN_TEST(test_htmlUTF8ToHtml_outbuf_too_small_for_entity);
    RUN_TEST(test_htmlUTF8ToHtml_outbuf_too_small_mid_ascii);
    RUN_TEST(test_htmlUTF8ToHtml_exact_boundary_for_entity);
    return UNITY_END();
}