|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define IN_LIBXML |
|
|
#include "libxml.h" |
|
|
|
|
|
#include <string.h> |
|
|
#include <limits.h> |
|
|
#include <ctype.h> |
|
|
#include <stdlib.h> |
|
|
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
|
#include <iconv.h> |
|
|
#include <errno.h> |
|
|
#endif |
|
|
|
|
|
#include <libxml/encoding.h> |
|
|
#include <libxml/xmlmemory.h> |
|
|
#include <libxml/parser.h> |
|
|
#ifdef LIBXML_HTML_ENABLED |
|
|
#include <libxml/HTMLparser.h> |
|
|
#endif |
|
|
#include <libxml/xmlerror.h> |
|
|
|
|
|
#include "private/buf.h" |
|
|
#include "private/enc.h" |
|
|
#include "private/error.h" |
|
|
#include "private/memory.h" |
|
|
|
|
|
#ifdef LIBXML_ICU_ENABLED |
|
|
#include <unicode/ucnv.h> |
|
|
#endif |
|
|
|
|
|
#define XML_HANDLER_STATIC (1 << 0) |
|
|
#define XML_HANDLER_LEGACY (1 << 1) |
|
|
|
|
|
typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; |
|
|
typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; |
|
|
struct _xmlCharEncodingAlias { |
|
|
const char *name; |
|
|
const char *alias; |
|
|
}; |
|
|
|
|
|
static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; |
|
|
static int xmlCharEncodingAliasesNb = 0; |
|
|
static int xmlCharEncodingAliasesMax = 0; |
|
|
|
|
|
static int xmlLittleEndian = 1; |
|
|
|
|
|
typedef struct { |
|
|
const char *name; |
|
|
xmlCharEncoding enc; |
|
|
} xmlEncTableEntry; |
|
|
|
|
|
static const xmlEncTableEntry xmlEncTable[] = { |
|
|
{ "ansi_x3.4-1968", XML_CHAR_ENCODING_ASCII }, |
|
|
{ "arabic", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "ascii", XML_CHAR_ENCODING_ASCII }, |
|
|
{ "asmo-708", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "cp1252", XML_CHAR_ENCODING_WINDOWS_1252 }, |
|
|
{ "cp819", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "cseucpkdfmtjapanese", XML_CHAR_ENCODING_EUC_JP }, |
|
|
{ "csiso2022jp", XML_CHAR_ENCODING_2022_JP }, |
|
|
{ "csiso88596e", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "csiso88596i", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "csiso88598e", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "csiso88598i", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "csisolatin1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "csisolatin2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "csisolatin3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "csisolatin4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "csisolatin5", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "csisolatin6", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "csisolatin9", XML_CHAR_ENCODING_8859_15 }, |
|
|
{ "csisolatinarabic", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "csisolatincyrillic", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "csisolatingreek", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "csisolatinhebrew", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "csshiftjis", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "csunicode", XML_CHAR_ENCODING_UTF16 }, |
|
|
{ "cyrillic", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "ecma-114", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "ecma-118", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "elot_928", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "euc-jp", XML_CHAR_ENCODING_EUC_JP }, |
|
|
{ "greek", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "greek8", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "html", XML_CHAR_ENCODING_HTML }, |
|
|
{ "ibm819", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso latin 1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso latin 2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso-10646-ucs-2", XML_CHAR_ENCODING_UCS2 }, |
|
|
{ "iso-10646-ucs-4", XML_CHAR_ENCODING_UCS4LE }, |
|
|
{ "iso-2022-jp", XML_CHAR_ENCODING_2022_JP }, |
|
|
{ "iso-8859-1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso-8859-10", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "iso-8859-11", XML_CHAR_ENCODING_8859_11 }, |
|
|
{ "iso-8859-13", XML_CHAR_ENCODING_8859_13 }, |
|
|
{ "iso-8859-14", XML_CHAR_ENCODING_8859_14 }, |
|
|
{ "iso-8859-15", XML_CHAR_ENCODING_8859_15 }, |
|
|
{ "iso-8859-16", XML_CHAR_ENCODING_8859_16 }, |
|
|
{ "iso-8859-2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso-8859-3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso-8859-4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso-8859-5", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso-8859-6", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso-8859-6-e", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso-8859-6-i", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso-8859-7", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso-8859-8", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso-8859-8-i", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso-8859-9", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "iso-ir-100", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso-ir-101", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso-ir-109", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso-ir-110", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso-ir-126", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso-ir-127", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso-ir-138", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso-ir-144", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso-ir-148", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "iso-ir-157", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "iso-latin-1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso-latin-2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso8859-1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso8859-10", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso8859-13", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso8859-14", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso8859-15", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso8859-2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso8859-3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso8859-4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso8859-5", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso8859-6", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso8859-7", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso8859-8", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso8859-9", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "iso88591", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso885910", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "iso885913", XML_CHAR_ENCODING_8859_13 }, |
|
|
{ "iso885914", XML_CHAR_ENCODING_8859_14 }, |
|
|
{ "iso885915", XML_CHAR_ENCODING_8859_15 }, |
|
|
{ "iso88592", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso88593", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso88594", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso88595", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso88596", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso88597", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso88598", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso88599", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "iso_8859-1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso_8859-1:1987", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "iso_8859-2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso_8859-2:1987", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "iso_8859-3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso_8859-3:1988", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "iso_8859-4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso_8859-4:1988", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "iso_8859-5", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso_8859-5:1988", XML_CHAR_ENCODING_8859_5 }, |
|
|
{ "iso_8859-6", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso_8859-6:1987", XML_CHAR_ENCODING_8859_6 }, |
|
|
{ "iso_8859-7", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso_8859-7:1987", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "iso_8859-8", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso_8859-8:1988", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "iso_8859-9", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "iso_8859-9:1989", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "l1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "l2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "l3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "l4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "l5", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "l6", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "l9", XML_CHAR_ENCODING_8859_15 }, |
|
|
{ "latin1", XML_CHAR_ENCODING_8859_1 }, |
|
|
{ "latin2", XML_CHAR_ENCODING_8859_2 }, |
|
|
{ "latin3", XML_CHAR_ENCODING_8859_3 }, |
|
|
{ "latin4", XML_CHAR_ENCODING_8859_4 }, |
|
|
{ "latin5", XML_CHAR_ENCODING_8859_9 }, |
|
|
{ "latin6", XML_CHAR_ENCODING_8859_10 }, |
|
|
{ "logical", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "ms932", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "ms_kanji", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "shift-jis", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "shift_jis", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "sjis", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "sun_eu_greek", XML_CHAR_ENCODING_8859_7 }, |
|
|
{ "ucs-2", XML_CHAR_ENCODING_UCS2 }, |
|
|
{ "ucs-4", XML_CHAR_ENCODING_UCS4LE }, |
|
|
{ "ucs2", XML_CHAR_ENCODING_UCS2 }, |
|
|
{ "ucs4", XML_CHAR_ENCODING_UCS4LE }, |
|
|
{ "unicode", XML_CHAR_ENCODING_UTF16 }, |
|
|
{ "unicode-1-1-utf-8", XML_CHAR_ENCODING_UTF8 }, |
|
|
{ "unicode11utf8", XML_CHAR_ENCODING_UTF8 }, |
|
|
{ "unicode20utf8", XML_CHAR_ENCODING_UTF8 }, |
|
|
{ "unicodefffe", XML_CHAR_ENCODING_UTF16BE }, |
|
|
{ "unicodefeff", XML_CHAR_ENCODING_UTF16LE }, |
|
|
{ "us-ascii", XML_CHAR_ENCODING_ASCII }, |
|
|
{ "utf-16", XML_CHAR_ENCODING_UTF16 }, |
|
|
{ "utf-16be", XML_CHAR_ENCODING_UTF16BE }, |
|
|
{ "utf-16le", XML_CHAR_ENCODING_UTF16LE }, |
|
|
{ "utf-8", XML_CHAR_ENCODING_UTF8 }, |
|
|
{ "utf16", XML_CHAR_ENCODING_UTF16 }, |
|
|
{ "utf8", XML_CHAR_ENCODING_UTF8 }, |
|
|
{ "visual", XML_CHAR_ENCODING_8859_8 }, |
|
|
{ "windows-1252", XML_CHAR_ENCODING_WINDOWS_1252 }, |
|
|
{ "windows-31j", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "x-cp1252", XML_CHAR_ENCODING_WINDOWS_1252 }, |
|
|
{ "x-euc-jp", XML_CHAR_ENCODING_EUC_JP }, |
|
|
{ "x-sjis", XML_CHAR_ENCODING_SHIFT_JIS }, |
|
|
{ "x-unicode20utf8", XML_CHAR_ENCODING_UTF8 } |
|
|
}; |
|
|
|
|
|
static xmlCharEncError |
|
|
asciiToAscii(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF8(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* inb, int *inlenb, int flush); |
|
|
static xmlCharEncError |
|
|
latin1ToUTF8(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
UTF16LEToUTF8(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* inb, int *inlenb, int flush); |
|
|
static xmlCharEncError |
|
|
UTF16BEToUTF8(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* inb, int *inlenb, int flush); |
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
|
|
|
static xmlCharEncError |
|
|
UTF8ToLatin1(void *vctxt, unsigned char* outb, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16(void *vctxt, unsigned char* outb, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16LE(void *vctxt, unsigned char* outb, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16BE(void *vctxt, unsigned char* outb, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
|
|
|
#else |
|
|
|
|
|
#define UTF8ToLatin1 NULL |
|
|
#define UTF8ToUTF16 NULL |
|
|
#define UTF8ToUTF16LE NULL |
|
|
#define UTF8ToUTF16BE NULL |
|
|
|
|
|
#endif |
|
|
|
|
|
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED) |
|
|
static xmlCharEncError |
|
|
UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, int flush); |
|
|
#else |
|
|
#define UTF8ToHtmlWrapper NULL |
|
|
#endif |
|
|
|
|
|
#include "codegen/charset.inc" |
|
|
|
|
|
static xmlCharEncError |
|
|
EightBitToUtf8(void *vctxt, unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, int flush); |
|
|
static xmlCharEncError |
|
|
Utf8ToEightBit(void *vctxt, unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, int flush); |
|
|
|
|
|
#define MAKE_8BIT_HANDLER(name, table) \ |
|
|
{ (char *) name, { EightBitToUtf8 }, { Utf8ToEightBit }, \ |
|
|
(void *) xmlunicodetable_##table, \ |
|
|
(void *) xmltranscodetable_##table, \ |
|
|
NULL, XML_HANDLER_STATIC } |
|
|
|
|
|
#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \ |
|
|
defined(LIBXML_ISO8859X_ENABLED) |
|
|
|
|
|
#define MAKE_ISO_HANDLER(name, n) MAKE_8BIT_HANDLER(name, ISO8859_##n) |
|
|
|
|
|
#else |
|
|
|
|
|
#define MAKE_ISO_HANDLER(name, n) \ |
|
|
{ (char *) name, { NULL }, { NULL }, NULL, NULL, NULL, \ |
|
|
XML_HANDLER_STATIC } |
|
|
|
|
|
#endif |
|
|
|
|
|
#define MAKE_HANDLER(name, in, out) \ |
|
|
{ (char *) name, { in }, { out }, NULL, NULL, NULL, XML_HANDLER_STATIC } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static const xmlCharEncodingHandler defaultHandlers[32] = { |
|
|
MAKE_HANDLER(NULL, NULL, NULL), |
|
|
MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8), |
|
|
MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE), |
|
|
MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE), |
|
|
MAKE_HANDLER("UCS-4LE", NULL, NULL), |
|
|
MAKE_HANDLER("UCS-4BE", NULL, NULL), |
|
|
MAKE_HANDLER("IBM037", NULL, NULL), |
|
|
MAKE_HANDLER(NULL, NULL, NULL), |
|
|
MAKE_HANDLER(NULL, NULL, NULL), |
|
|
MAKE_HANDLER("UCS-2", NULL, NULL), |
|
|
MAKE_HANDLER("ISO-8859-1", latin1ToUTF8, UTF8ToLatin1), |
|
|
MAKE_ISO_HANDLER("ISO-8859-2", 2), |
|
|
MAKE_ISO_HANDLER("ISO-8859-3", 3), |
|
|
MAKE_ISO_HANDLER("ISO-8859-4", 4), |
|
|
MAKE_ISO_HANDLER("ISO-8859-5", 5), |
|
|
MAKE_ISO_HANDLER("ISO-8859-6", 6), |
|
|
MAKE_ISO_HANDLER("ISO-8859-7", 7), |
|
|
MAKE_ISO_HANDLER("ISO-8859-8", 8), |
|
|
MAKE_ISO_HANDLER("ISO-8859-9", 9), |
|
|
MAKE_HANDLER("ISO-2022-JP", NULL, NULL), |
|
|
MAKE_HANDLER("Shift_JIS", NULL, NULL), |
|
|
MAKE_HANDLER("EUC-JP", NULL, NULL), |
|
|
MAKE_HANDLER("US-ASCII", asciiToAscii, asciiToAscii), |
|
|
MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16), |
|
|
MAKE_HANDLER("HTML", NULL, UTF8ToHtmlWrapper), |
|
|
MAKE_ISO_HANDLER("ISO-8859-10", 10), |
|
|
MAKE_ISO_HANDLER("ISO-8859-11", 11), |
|
|
MAKE_ISO_HANDLER("ISO-8859-13", 13), |
|
|
MAKE_ISO_HANDLER("ISO-8859-14", 14), |
|
|
MAKE_ISO_HANDLER("ISO-8859-15", 15), |
|
|
MAKE_ISO_HANDLER("ISO-8859-16", 16), |
|
|
MAKE_8BIT_HANDLER("windows-1252", windows_1252) |
|
|
}; |
|
|
|
|
|
#define NUM_DEFAULT_HANDLERS \ |
|
|
(sizeof(defaultHandlers) / sizeof(defaultHandlers[0])) |
|
|
|
|
|
|
|
|
#define MAX_ENCODING_HANDLERS 50 |
|
|
static xmlCharEncodingHandlerPtr *globalHandlers = NULL; |
|
|
static int nbCharEncodingHandler = 0; |
|
|
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
|
static xmlParserErrors |
|
|
xmlCharEncIconv(const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncodingHandler **out); |
|
|
#endif |
|
|
|
|
|
#ifdef LIBXML_ICU_ENABLED |
|
|
static xmlParserErrors |
|
|
xmlCharEncUconv(const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncodingHandler **out); |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncoding |
|
|
xmlDetectCharEncoding(const unsigned char* in, int len) |
|
|
{ |
|
|
if (in == NULL) |
|
|
return(XML_CHAR_ENCODING_NONE); |
|
|
if (len >= 4) { |
|
|
if ((in[0] == 0x00) && (in[1] == 0x00) && |
|
|
(in[2] == 0x00) && (in[3] == 0x3C)) |
|
|
return(XML_CHAR_ENCODING_UCS4BE); |
|
|
if ((in[0] == 0x3C) && (in[1] == 0x00) && |
|
|
(in[2] == 0x00) && (in[3] == 0x00)) |
|
|
return(XML_CHAR_ENCODING_UCS4LE); |
|
|
if ((in[0] == 0x4C) && (in[1] == 0x6F) && |
|
|
(in[2] == 0xA7) && (in[3] == 0x94)) |
|
|
return(XML_CHAR_ENCODING_EBCDIC); |
|
|
if ((in[0] == 0x3C) && (in[1] == 0x3F) && |
|
|
(in[2] == 0x78) && (in[3] == 0x6D)) |
|
|
return(XML_CHAR_ENCODING_UTF8); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((in[0] == 0x3C) && (in[1] == 0x00) && |
|
|
(in[2] == 0x3F) && (in[3] == 0x00)) |
|
|
return(XML_CHAR_ENCODING_UTF16LE); |
|
|
if ((in[0] == 0x00) && (in[1] == 0x3C) && |
|
|
(in[2] == 0x00) && (in[3] == 0x3F)) |
|
|
return(XML_CHAR_ENCODING_UTF16BE); |
|
|
} |
|
|
if (len >= 3) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((in[0] == 0xEF) && (in[1] == 0xBB) && |
|
|
(in[2] == 0xBF)) |
|
|
return(XML_CHAR_ENCODING_UTF8); |
|
|
} |
|
|
|
|
|
if (len >= 2) { |
|
|
if ((in[0] == 0xFE) && (in[1] == 0xFF)) |
|
|
return(XML_CHAR_ENCODING_UTF16BE); |
|
|
if ((in[0] == 0xFF) && (in[1] == 0xFE)) |
|
|
return(XML_CHAR_ENCODING_UTF16LE); |
|
|
} |
|
|
return(XML_CHAR_ENCODING_NONE); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
xmlCleanupEncodingAliases(void) { |
|
|
int i; |
|
|
|
|
|
if (xmlCharEncodingAliases == NULL) |
|
|
return; |
|
|
|
|
|
for (i = 0;i < xmlCharEncodingAliasesNb;i++) { |
|
|
if (xmlCharEncodingAliases[i].name != NULL) |
|
|
xmlFree((char *) xmlCharEncodingAliases[i].name); |
|
|
if (xmlCharEncodingAliases[i].alias != NULL) |
|
|
xmlFree((char *) xmlCharEncodingAliases[i].alias); |
|
|
} |
|
|
xmlCharEncodingAliasesNb = 0; |
|
|
xmlCharEncodingAliasesMax = 0; |
|
|
xmlFree(xmlCharEncodingAliases); |
|
|
xmlCharEncodingAliases = NULL; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const char * |
|
|
xmlGetEncodingAlias(const char *alias) { |
|
|
int i; |
|
|
char upper[100]; |
|
|
|
|
|
if (alias == NULL) |
|
|
return(NULL); |
|
|
|
|
|
if (xmlCharEncodingAliases == NULL) |
|
|
return(NULL); |
|
|
|
|
|
for (i = 0;i < 99;i++) { |
|
|
upper[i] = (char) toupper((unsigned char) alias[i]); |
|
|
if (upper[i] == 0) break; |
|
|
} |
|
|
upper[i] = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0;i < xmlCharEncodingAliasesNb;i++) { |
|
|
if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { |
|
|
return(xmlCharEncodingAliases[i].name); |
|
|
} |
|
|
} |
|
|
return(NULL); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlAddEncodingAlias(const char *name, const char *alias) { |
|
|
int i; |
|
|
char upper[100]; |
|
|
char *nameCopy, *aliasCopy; |
|
|
|
|
|
if ((name == NULL) || (alias == NULL)) |
|
|
return(-1); |
|
|
|
|
|
for (i = 0;i < 99;i++) { |
|
|
upper[i] = (char) toupper((unsigned char) alias[i]); |
|
|
if (upper[i] == 0) break; |
|
|
} |
|
|
upper[i] = 0; |
|
|
|
|
|
if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { |
|
|
xmlCharEncodingAliasPtr tmp; |
|
|
int newSize; |
|
|
|
|
|
newSize = xmlGrowCapacity(xmlCharEncodingAliasesMax, sizeof(tmp[0]), |
|
|
20, XML_MAX_ITEMS); |
|
|
if (newSize < 0) |
|
|
return(-1); |
|
|
tmp = xmlRealloc(xmlCharEncodingAliases, newSize * sizeof(tmp[0])); |
|
|
if (tmp == NULL) |
|
|
return(-1); |
|
|
xmlCharEncodingAliases = tmp; |
|
|
xmlCharEncodingAliasesMax = newSize; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0;i < xmlCharEncodingAliasesNb;i++) { |
|
|
if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { |
|
|
|
|
|
|
|
|
|
|
|
nameCopy = xmlMemStrdup(name); |
|
|
if (nameCopy == NULL) |
|
|
return(-1); |
|
|
xmlFree((char *) xmlCharEncodingAliases[i].name); |
|
|
xmlCharEncodingAliases[i].name = nameCopy; |
|
|
return(0); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
nameCopy = xmlMemStrdup(name); |
|
|
if (nameCopy == NULL) |
|
|
return(-1); |
|
|
aliasCopy = xmlMemStrdup(upper); |
|
|
if (aliasCopy == NULL) { |
|
|
xmlFree(nameCopy); |
|
|
return(-1); |
|
|
} |
|
|
xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy; |
|
|
xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy; |
|
|
xmlCharEncodingAliasesNb++; |
|
|
return(0); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlDelEncodingAlias(const char *alias) { |
|
|
int i; |
|
|
|
|
|
if (alias == NULL) |
|
|
return(-1); |
|
|
|
|
|
if (xmlCharEncodingAliases == NULL) |
|
|
return(-1); |
|
|
|
|
|
|
|
|
|
|
|
for (i = 0;i < xmlCharEncodingAliasesNb;i++) { |
|
|
if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { |
|
|
xmlFree((char *) xmlCharEncodingAliases[i].name); |
|
|
xmlFree((char *) xmlCharEncodingAliases[i].alias); |
|
|
xmlCharEncodingAliasesNb--; |
|
|
memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], |
|
|
sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); |
|
|
return(0); |
|
|
} |
|
|
} |
|
|
return(-1); |
|
|
} |
|
|
|
|
|
static int |
|
|
xmlCompareEncTableEntries(const void *vkey, const void *ventry) { |
|
|
const char *key = vkey; |
|
|
const xmlEncTableEntry *entry = ventry; |
|
|
|
|
|
return(xmlStrcasecmp(BAD_CAST key, BAD_CAST entry->name)); |
|
|
} |
|
|
|
|
|
static xmlCharEncoding |
|
|
xmlParseCharEncodingInternal(const char *name) |
|
|
{ |
|
|
const xmlEncTableEntry *entry; |
|
|
|
|
|
if (name == NULL) |
|
|
return(XML_CHAR_ENCODING_NONE); |
|
|
|
|
|
entry = bsearch(name, xmlEncTable, |
|
|
sizeof(xmlEncTable) / sizeof(xmlEncTable[0]), |
|
|
sizeof(xmlEncTable[0]), xmlCompareEncTableEntries); |
|
|
if (entry != NULL) |
|
|
return(entry->enc); |
|
|
|
|
|
return(XML_CHAR_ENCODING_ERROR); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncoding |
|
|
xmlParseCharEncoding(const char *name) |
|
|
{ |
|
|
xmlCharEncoding enc = xmlParseCharEncodingInternal(name); |
|
|
|
|
|
|
|
|
if (enc == XML_CHAR_ENCODING_UTF16) |
|
|
enc = XML_CHAR_ENCODING_UTF16LE; |
|
|
|
|
|
return(enc); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const char* |
|
|
xmlGetCharEncodingName(xmlCharEncoding enc) { |
|
|
switch (enc) { |
|
|
case XML_CHAR_ENCODING_UTF16LE: |
|
|
return("UTF-16"); |
|
|
case XML_CHAR_ENCODING_UTF16BE: |
|
|
return("UTF-16"); |
|
|
case XML_CHAR_ENCODING_UCS4LE: |
|
|
return("UCS-4"); |
|
|
case XML_CHAR_ENCODING_UCS4BE: |
|
|
return("UCS-4"); |
|
|
default: |
|
|
break; |
|
|
} |
|
|
|
|
|
if ((enc <= 0) || ((size_t) enc >= NUM_DEFAULT_HANDLERS)) |
|
|
return(NULL); |
|
|
|
|
|
return(defaultHandlers[enc].name); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncodingHandler * |
|
|
xmlNewCharEncodingHandler(const char *name, |
|
|
xmlCharEncodingInputFunc input, |
|
|
xmlCharEncodingOutputFunc output) { |
|
|
xmlCharEncodingHandlerPtr handler; |
|
|
const char *alias; |
|
|
char upper[500]; |
|
|
int i; |
|
|
char *up = NULL; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alias = xmlGetEncodingAlias(name); |
|
|
if (alias != NULL) |
|
|
name = alias; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (name == NULL) |
|
|
return(NULL); |
|
|
for (i = 0;i < 499;i++) { |
|
|
upper[i] = (char) toupper((unsigned char) name[i]); |
|
|
if (upper[i] == 0) break; |
|
|
} |
|
|
upper[i] = 0; |
|
|
up = xmlMemStrdup(upper); |
|
|
if (up == NULL) |
|
|
return(NULL); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
handler = (xmlCharEncodingHandlerPtr) |
|
|
xmlMalloc(sizeof(xmlCharEncodingHandler)); |
|
|
if (handler == NULL) { |
|
|
xmlFree(up); |
|
|
return(NULL); |
|
|
} |
|
|
memset(handler, 0, sizeof(xmlCharEncodingHandler)); |
|
|
handler->input.legacyFunc = input; |
|
|
handler->output.legacyFunc = output; |
|
|
handler->name = up; |
|
|
handler->flags = XML_HANDLER_STATIC | XML_HANDLER_LEGACY; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlRegisterCharEncodingHandler(handler); |
|
|
return(handler); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlParserErrors |
|
|
xmlCharEncNewCustomHandler(const char *name, |
|
|
xmlCharEncConvFunc input, xmlCharEncConvFunc output, |
|
|
xmlCharEncConvCtxtDtor ctxtDtor, |
|
|
void *inputCtxt, void *outputCtxt, |
|
|
xmlCharEncodingHandler **out) { |
|
|
xmlCharEncodingHandler *handler; |
|
|
|
|
|
if (out == NULL) |
|
|
return(XML_ERR_ARGUMENT); |
|
|
|
|
|
handler = xmlMalloc(sizeof(*handler)); |
|
|
if (handler == NULL) |
|
|
goto error; |
|
|
memset(handler, 0, sizeof(*handler)); |
|
|
|
|
|
if (name != NULL) { |
|
|
handler->name = xmlMemStrdup(name); |
|
|
if (handler->name == NULL) |
|
|
goto error; |
|
|
} |
|
|
|
|
|
handler->input.func = input; |
|
|
handler->output.func = output; |
|
|
handler->ctxtDtor = ctxtDtor; |
|
|
handler->inputCtxt = inputCtxt; |
|
|
handler->outputCtxt = outputCtxt; |
|
|
|
|
|
*out = handler; |
|
|
return(XML_ERR_OK); |
|
|
|
|
|
error: |
|
|
xmlFree(handler); |
|
|
|
|
|
if (ctxtDtor != NULL) { |
|
|
if (inputCtxt != NULL) |
|
|
ctxtDtor(inputCtxt); |
|
|
if (outputCtxt != NULL) |
|
|
ctxtDtor(outputCtxt); |
|
|
} |
|
|
|
|
|
return(XML_ERR_NO_MEMORY); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
xmlInitCharEncodingHandlers(void) { |
|
|
xmlInitParser(); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
xmlInitEncodingInternal(void) { |
|
|
unsigned short int tst = 0x1234; |
|
|
unsigned char *ptr = (unsigned char *) &tst; |
|
|
|
|
|
if (*ptr == 0x12) xmlLittleEndian = 0; |
|
|
else xmlLittleEndian = 1; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
xmlCleanupCharEncodingHandlers(void) { |
|
|
xmlCleanupEncodingAliases(); |
|
|
|
|
|
if (globalHandlers == NULL) return; |
|
|
|
|
|
for (;nbCharEncodingHandler > 0;) { |
|
|
xmlCharEncodingHandler *handler; |
|
|
|
|
|
nbCharEncodingHandler--; |
|
|
handler = globalHandlers[nbCharEncodingHandler]; |
|
|
if (handler != NULL) { |
|
|
if (handler->name != NULL) |
|
|
xmlFree(handler->name); |
|
|
xmlFree(handler); |
|
|
} |
|
|
} |
|
|
xmlFree(globalHandlers); |
|
|
globalHandlers = NULL; |
|
|
nbCharEncodingHandler = 0; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void |
|
|
xmlRegisterCharEncodingHandler(xmlCharEncodingHandler *handler) { |
|
|
if (handler == NULL) |
|
|
return; |
|
|
if (globalHandlers == NULL) { |
|
|
globalHandlers = xmlMalloc( |
|
|
MAX_ENCODING_HANDLERS * sizeof(globalHandlers[0])); |
|
|
if (globalHandlers == NULL) |
|
|
goto free_handler; |
|
|
} |
|
|
|
|
|
if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) |
|
|
goto free_handler; |
|
|
globalHandlers[nbCharEncodingHandler++] = handler; |
|
|
return; |
|
|
|
|
|
free_handler: |
|
|
if (handler != NULL) { |
|
|
if (handler->name != NULL) { |
|
|
xmlFree(handler->name); |
|
|
} |
|
|
xmlFree(handler); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlParserErrors |
|
|
xmlFindExtraHandler(const char *norig, const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncConvImpl impl, void *implCtxt, |
|
|
xmlCharEncodingHandler **out) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (impl != NULL) |
|
|
return(impl(implCtxt, norig, flags, out)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (globalHandlers != NULL) { |
|
|
int i; |
|
|
|
|
|
for (i = 0; i < nbCharEncodingHandler; i++) { |
|
|
xmlCharEncodingHandler *h = globalHandlers[i]; |
|
|
|
|
|
if (!xmlStrcasecmp((const xmlChar *) name, |
|
|
(const xmlChar *) h->name)) { |
|
|
if ((((flags & XML_ENC_INPUT) == 0) || (h->input.func)) && |
|
|
(((flags & XML_ENC_OUTPUT) == 0) || (h->output.func))) { |
|
|
*out = h; |
|
|
return(XML_ERR_OK); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
|
{ |
|
|
int ret = xmlCharEncIconv(name, flags, out); |
|
|
|
|
|
if (ret == XML_ERR_OK) |
|
|
return(XML_ERR_OK); |
|
|
if (ret != XML_ERR_UNSUPPORTED_ENCODING) |
|
|
return(ret); |
|
|
} |
|
|
#endif |
|
|
|
|
|
#ifdef LIBXML_ICU_ENABLED |
|
|
{ |
|
|
int ret = xmlCharEncUconv(name, flags, out); |
|
|
|
|
|
if (ret == XML_ERR_OK) |
|
|
return(XML_ERR_OK); |
|
|
if (ret != XML_ERR_UNSUPPORTED_ENCODING) |
|
|
return(ret); |
|
|
} |
|
|
#endif |
|
|
|
|
|
return(XML_ERR_UNSUPPORTED_ENCODING); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlParserErrors |
|
|
xmlLookupCharEncodingHandler(xmlCharEncoding enc, |
|
|
xmlCharEncodingHandler **out) { |
|
|
const xmlCharEncodingHandler *handler; |
|
|
|
|
|
if (out == NULL) |
|
|
return(XML_ERR_ARGUMENT); |
|
|
*out = NULL; |
|
|
|
|
|
if ((enc <= 0) || ((size_t) enc >= NUM_DEFAULT_HANDLERS)) |
|
|
return(XML_ERR_UNSUPPORTED_ENCODING); |
|
|
|
|
|
|
|
|
if ((enc == XML_CHAR_ENCODING_UTF8) || |
|
|
(enc == XML_CHAR_ENCODING_NONE)) |
|
|
return(XML_ERR_OK); |
|
|
|
|
|
handler = &defaultHandlers[enc]; |
|
|
if ((handler->input.func != NULL) || (handler->output.func != NULL)) { |
|
|
*out = (xmlCharEncodingHandler *) handler; |
|
|
return(XML_ERR_OK); |
|
|
} |
|
|
|
|
|
if (handler->name != NULL) { |
|
|
xmlCharEncFlags flags = XML_ENC_INPUT; |
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
flags |= XML_ENC_OUTPUT; |
|
|
#endif |
|
|
return(xmlFindExtraHandler(handler->name, handler->name, flags, |
|
|
NULL, NULL, out)); |
|
|
} |
|
|
|
|
|
return(XML_ERR_UNSUPPORTED_ENCODING); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncodingHandler * |
|
|
xmlGetCharEncodingHandler(xmlCharEncoding enc) { |
|
|
xmlCharEncodingHandler *ret; |
|
|
|
|
|
xmlLookupCharEncodingHandler(enc, &ret); |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlParserErrors |
|
|
xmlCreateCharEncodingHandler(const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncConvImpl impl, void *implCtxt, |
|
|
xmlCharEncodingHandler **out) { |
|
|
const xmlCharEncodingHandler *handler; |
|
|
const char *norig, *nalias; |
|
|
xmlCharEncoding enc; |
|
|
|
|
|
if (out == NULL) |
|
|
return(XML_ERR_ARGUMENT); |
|
|
*out = NULL; |
|
|
|
|
|
if ((name == NULL) || (flags == 0)) |
|
|
return(XML_ERR_ARGUMENT); |
|
|
|
|
|
norig = name; |
|
|
nalias = xmlGetEncodingAlias(name); |
|
|
if (nalias != NULL) |
|
|
name = nalias; |
|
|
|
|
|
enc = xmlParseCharEncodingInternal(name); |
|
|
|
|
|
|
|
|
if (enc == XML_CHAR_ENCODING_UTF8) |
|
|
return(XML_ERR_OK); |
|
|
|
|
|
if ((enc > 0) && ((size_t) enc < NUM_DEFAULT_HANDLERS)) { |
|
|
if (flags & XML_ENC_HTML) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch (enc) { |
|
|
case XML_CHAR_ENCODING_ASCII: |
|
|
case XML_CHAR_ENCODING_8859_1: |
|
|
enc = XML_CHAR_ENCODING_WINDOWS_1252; |
|
|
break; |
|
|
case XML_CHAR_ENCODING_UCS2: |
|
|
case XML_CHAR_ENCODING_UTF16: |
|
|
enc = XML_CHAR_ENCODING_UTF16LE; |
|
|
break; |
|
|
default: |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
handler = &defaultHandlers[enc]; |
|
|
if ((((flags & XML_ENC_INPUT) == 0) || (handler->input.func)) && |
|
|
(((flags & XML_ENC_OUTPUT) == 0) || (handler->output.func))) { |
|
|
xmlCharEncodingHandler *ret; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ret = xmlMalloc(sizeof(*ret)); |
|
|
if (ret == NULL) |
|
|
return(XML_ERR_NO_MEMORY); |
|
|
memset(ret, 0, sizeof(*ret)); |
|
|
|
|
|
ret->name = xmlMemStrdup(norig); |
|
|
if (ret->name == NULL) { |
|
|
xmlFree(ret); |
|
|
return(XML_ERR_NO_MEMORY); |
|
|
} |
|
|
ret->input = handler->input; |
|
|
ret->output = handler->output; |
|
|
ret->inputCtxt = handler->inputCtxt; |
|
|
ret->outputCtxt = handler->outputCtxt; |
|
|
ret->ctxtDtor = handler->ctxtDtor; |
|
|
|
|
|
*out = ret; |
|
|
return(XML_ERR_OK); |
|
|
} |
|
|
} |
|
|
|
|
|
return(xmlFindExtraHandler(norig, name, flags, impl, implCtxt, out)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlParserErrors |
|
|
xmlOpenCharEncodingHandler(const char *name, int output, |
|
|
xmlCharEncodingHandler **out) { |
|
|
xmlCharEncFlags flags = output ? XML_ENC_OUTPUT : XML_ENC_INPUT; |
|
|
|
|
|
return(xmlCreateCharEncodingHandler(name, flags, NULL, NULL, out)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncodingHandler * |
|
|
xmlFindCharEncodingHandler(const char *name) { |
|
|
xmlCharEncodingHandler *ret; |
|
|
xmlCharEncFlags flags; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) || |
|
|
(xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF8") == 0)) |
|
|
return((xmlCharEncodingHandlerPtr) |
|
|
&defaultHandlers[XML_CHAR_ENCODING_UTF8]); |
|
|
|
|
|
flags = XML_ENC_INPUT; |
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
flags |= XML_ENC_OUTPUT; |
|
|
#endif |
|
|
xmlCreateCharEncodingHandler(name, flags, NULL, NULL, &ret); |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
|
typedef struct { |
|
|
iconv_t cd; |
|
|
} xmlIconvCtxt; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlCharEncError |
|
|
xmlIconvConvert(void *vctxt, unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
xmlIconvCtxt *ctxt = vctxt; |
|
|
size_t icv_inlen, icv_outlen; |
|
|
const char *icv_in = (const char *) in; |
|
|
char *icv_out = (char *) out; |
|
|
size_t ret; |
|
|
|
|
|
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
|
|
if (outlen != NULL) *outlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
icv_inlen = *inlen; |
|
|
icv_outlen = *outlen; |
|
|
|
|
|
|
|
|
|
|
|
ret = iconv(ctxt->cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen); |
|
|
*inlen -= icv_inlen; |
|
|
*outlen -= icv_outlen; |
|
|
if (ret == (size_t) -1) { |
|
|
if (errno == EILSEQ) |
|
|
return(XML_ENC_ERR_INPUT); |
|
|
if (errno == E2BIG) |
|
|
return(XML_ENC_ERR_SPACE); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (errno == EINVAL) |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
#ifdef __APPLE__ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (errno == EOPNOTSUPP) |
|
|
return(XML_ENC_ERR_INPUT); |
|
|
#endif |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
static void |
|
|
xmlIconvFree(void *vctxt) { |
|
|
xmlIconvCtxt *ctxt = vctxt; |
|
|
|
|
|
if (ctxt == NULL) |
|
|
return; |
|
|
|
|
|
if (ctxt->cd != (iconv_t) -1) |
|
|
iconv_close(ctxt->cd); |
|
|
|
|
|
xmlFree(ctxt); |
|
|
} |
|
|
|
|
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && \ |
|
|
defined(__GLIBC__) |
|
|
#include <libxml/parserInternals.h> |
|
|
|
|
|
static int |
|
|
xmlEncodingMatch(const char *name1, const char *name2) { |
|
|
|
|
|
|
|
|
|
|
|
while (1) { |
|
|
while ((*name1 != 0) && (!IS_ASCII_LETTER(*name1))) |
|
|
name1 += 1; |
|
|
while ((*name2 != 0) && (!IS_ASCII_LETTER(*name2))) |
|
|
name2 += 1; |
|
|
if ((*name1 == 0) || (*name2 == 0)) |
|
|
break; |
|
|
if ((*name1 | 0x20) != (*name2 | 0x20)) |
|
|
return(0); |
|
|
name1 += 1; |
|
|
name2 += 1; |
|
|
} |
|
|
|
|
|
|
|
|
return(*name2 == 0); |
|
|
} |
|
|
#endif |
|
|
|
|
|
static xmlParserErrors |
|
|
xmlCharEncIconv(const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncodingHandler **out) { |
|
|
xmlCharEncConvFunc inFunc = NULL, outFunc = NULL; |
|
|
xmlIconvCtxt *inputCtxt = NULL, *outputCtxt = NULL; |
|
|
iconv_t icv_in; |
|
|
iconv_t icv_out; |
|
|
xmlParserErrors ret; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (strstr(name, "//") != NULL) { |
|
|
ret = XML_ERR_UNSUPPORTED_ENCODING; |
|
|
goto error; |
|
|
} |
|
|
|
|
|
#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && \ |
|
|
defined(__GLIBC__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((xmlEncodingMatch(name, "TSCII")) || |
|
|
(xmlEncodingMatch(name, "BIG5-HKSCS"))) { |
|
|
ret = XML_ERR_UNSUPPORTED_ENCODING; |
|
|
goto error; |
|
|
} |
|
|
#endif |
|
|
|
|
|
if (flags & XML_ENC_INPUT) { |
|
|
inputCtxt = xmlMalloc(sizeof(xmlIconvCtxt)); |
|
|
if (inputCtxt == NULL) { |
|
|
ret = XML_ERR_NO_MEMORY; |
|
|
goto error; |
|
|
} |
|
|
inputCtxt->cd = (iconv_t) -1; |
|
|
|
|
|
icv_in = iconv_open("UTF-8", name); |
|
|
if (icv_in == (iconv_t) -1) { |
|
|
if (errno == EINVAL) |
|
|
ret = XML_ERR_UNSUPPORTED_ENCODING; |
|
|
else if (errno == ENOMEM) |
|
|
ret = XML_ERR_NO_MEMORY; |
|
|
else |
|
|
ret = XML_ERR_SYSTEM; |
|
|
goto error; |
|
|
} |
|
|
inputCtxt->cd = icv_in; |
|
|
|
|
|
inFunc = xmlIconvConvert; |
|
|
} |
|
|
|
|
|
if (flags & XML_ENC_OUTPUT) { |
|
|
outputCtxt = xmlMalloc(sizeof(xmlIconvCtxt)); |
|
|
if (outputCtxt == NULL) { |
|
|
ret = XML_ERR_NO_MEMORY; |
|
|
goto error; |
|
|
} |
|
|
outputCtxt->cd = (iconv_t) -1; |
|
|
|
|
|
icv_out = iconv_open(name, "UTF-8"); |
|
|
if (icv_out == (iconv_t) -1) { |
|
|
if (errno == EINVAL) |
|
|
ret = XML_ERR_UNSUPPORTED_ENCODING; |
|
|
else if (errno == ENOMEM) |
|
|
ret = XML_ERR_NO_MEMORY; |
|
|
else |
|
|
ret = XML_ERR_SYSTEM; |
|
|
goto error; |
|
|
} |
|
|
outputCtxt->cd = icv_out; |
|
|
|
|
|
outFunc = xmlIconvConvert; |
|
|
} |
|
|
|
|
|
return(xmlCharEncNewCustomHandler(name, inFunc, outFunc, xmlIconvFree, |
|
|
inputCtxt, outputCtxt, out)); |
|
|
|
|
|
error: |
|
|
if (inputCtxt != NULL) |
|
|
xmlIconvFree(inputCtxt); |
|
|
if (outputCtxt != NULL) |
|
|
xmlIconvFree(outputCtxt); |
|
|
return(ret); |
|
|
} |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LIBXML_ICU_ENABLED |
|
|
|
|
|
#define ICU_PIVOT_BUF_SIZE 1024 |
|
|
|
|
|
typedef struct _uconv_t xmlUconvCtxt; |
|
|
struct _uconv_t { |
|
|
UConverter *uconv; |
|
|
UConverter *utf8; |
|
|
UChar *pivot_source; |
|
|
UChar *pivot_target; |
|
|
int isInput; |
|
|
UChar pivot_buf[ICU_PIVOT_BUF_SIZE]; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlCharEncError |
|
|
xmlUconvConvert(void *vctxt, unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, int flush) { |
|
|
xmlUconvCtxt *cd = vctxt; |
|
|
const char *ucv_in = (const char *) in; |
|
|
char *ucv_out = (char *) out; |
|
|
UConverter *target, *source; |
|
|
UErrorCode err = U_ZERO_ERROR; |
|
|
int ret; |
|
|
|
|
|
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
|
|
if (outlen != NULL) |
|
|
*outlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (cd->isInput) { |
|
|
source = cd->uconv; |
|
|
target = cd->utf8; |
|
|
} else { |
|
|
source = cd->utf8; |
|
|
target = cd->uconv; |
|
|
} |
|
|
|
|
|
ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen, |
|
|
&ucv_in, ucv_in + *inlen, cd->pivot_buf, |
|
|
&cd->pivot_source, &cd->pivot_target, |
|
|
cd->pivot_buf + ICU_PIVOT_BUF_SIZE, |
|
|
0, flush, &err); |
|
|
|
|
|
*inlen = ucv_in - (const char*) in; |
|
|
*outlen = ucv_out - (char *) out; |
|
|
|
|
|
if (U_SUCCESS(err)) { |
|
|
ret = XML_ENC_ERR_SUCCESS; |
|
|
} else { |
|
|
switch (err) { |
|
|
case U_TRUNCATED_CHAR_FOUND: |
|
|
|
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
break; |
|
|
|
|
|
case U_BUFFER_OVERFLOW_ERROR: |
|
|
ret = XML_ENC_ERR_SPACE; |
|
|
break; |
|
|
|
|
|
case U_INVALID_CHAR_FOUND: |
|
|
case U_ILLEGAL_CHAR_FOUND: |
|
|
case U_ILLEGAL_ESCAPE_SEQUENCE: |
|
|
case U_UNSUPPORTED_ESCAPE_SEQUENCE: |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
break; |
|
|
|
|
|
case U_MEMORY_ALLOCATION_ERROR: |
|
|
ret = XML_ENC_ERR_MEMORY; |
|
|
break; |
|
|
|
|
|
default: |
|
|
ret = XML_ENC_ERR_INTERNAL; |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
return(ret); |
|
|
} |
|
|
|
|
|
static xmlParserErrors |
|
|
openIcuConverter(const char* name, int isInput, xmlUconvCtxt **out) |
|
|
{ |
|
|
UErrorCode status; |
|
|
xmlUconvCtxt *conv; |
|
|
|
|
|
*out = NULL; |
|
|
|
|
|
conv = (xmlUconvCtxt *) xmlMalloc(sizeof(xmlUconvCtxt)); |
|
|
if (conv == NULL) |
|
|
return(XML_ERR_NO_MEMORY); |
|
|
|
|
|
conv->isInput = isInput; |
|
|
conv->pivot_source = conv->pivot_buf; |
|
|
conv->pivot_target = conv->pivot_buf; |
|
|
|
|
|
status = U_ZERO_ERROR; |
|
|
conv->uconv = ucnv_open(name, &status); |
|
|
if (U_FAILURE(status)) |
|
|
goto error; |
|
|
|
|
|
status = U_ZERO_ERROR; |
|
|
if (isInput) { |
|
|
ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, |
|
|
NULL, NULL, NULL, &status); |
|
|
} |
|
|
else { |
|
|
ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, |
|
|
NULL, NULL, NULL, &status); |
|
|
} |
|
|
if (U_FAILURE(status)) |
|
|
goto error; |
|
|
|
|
|
status = U_ZERO_ERROR; |
|
|
conv->utf8 = ucnv_open("UTF-8", &status); |
|
|
if (U_FAILURE(status)) |
|
|
goto error; |
|
|
|
|
|
*out = conv; |
|
|
return(XML_ERR_OK); |
|
|
|
|
|
error: |
|
|
if (conv->uconv) |
|
|
ucnv_close(conv->uconv); |
|
|
xmlFree(conv); |
|
|
|
|
|
if (status == U_FILE_ACCESS_ERROR) |
|
|
return(XML_ERR_UNSUPPORTED_ENCODING); |
|
|
if (status == U_MEMORY_ALLOCATION_ERROR) |
|
|
return(XML_ERR_NO_MEMORY); |
|
|
return(XML_ERR_SYSTEM); |
|
|
} |
|
|
|
|
|
static void |
|
|
closeIcuConverter(xmlUconvCtxt *conv) |
|
|
{ |
|
|
if (conv == NULL) |
|
|
return; |
|
|
ucnv_close(conv->uconv); |
|
|
ucnv_close(conv->utf8); |
|
|
xmlFree(conv); |
|
|
} |
|
|
|
|
|
static void |
|
|
xmlUconvFree(void *vctxt) { |
|
|
closeIcuConverter(vctxt); |
|
|
} |
|
|
|
|
|
static xmlParserErrors |
|
|
xmlCharEncUconv(const char *name, xmlCharEncFlags flags, |
|
|
xmlCharEncodingHandler **out) { |
|
|
xmlCharEncConvFunc inFunc = NULL, outFunc = NULL; |
|
|
xmlUconvCtxt *ucv_in = NULL; |
|
|
xmlUconvCtxt *ucv_out = NULL; |
|
|
int ret; |
|
|
|
|
|
if (flags & XML_ENC_INPUT) { |
|
|
ret = openIcuConverter(name, 1, &ucv_in); |
|
|
if (ret != 0) |
|
|
goto error; |
|
|
inFunc = xmlUconvConvert; |
|
|
} |
|
|
|
|
|
if (flags & XML_ENC_OUTPUT) { |
|
|
ret = openIcuConverter(name, 0, &ucv_out); |
|
|
if (ret != 0) |
|
|
goto error; |
|
|
outFunc = xmlUconvConvert; |
|
|
} |
|
|
|
|
|
return(xmlCharEncNewCustomHandler(name, inFunc, outFunc, xmlUconvFree, |
|
|
ucv_in, ucv_out, out)); |
|
|
|
|
|
error: |
|
|
if (ucv_in != NULL) |
|
|
closeIcuConverter(ucv_in); |
|
|
if (ucv_out != NULL) |
|
|
closeIcuConverter(ucv_out); |
|
|
return(ret); |
|
|
} |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlParserErrors |
|
|
xmlEncConvertError(xmlCharEncError code) { |
|
|
xmlParserErrors ret; |
|
|
|
|
|
switch (code) { |
|
|
case XML_ENC_ERR_SUCCESS: |
|
|
ret = XML_ERR_OK; |
|
|
break; |
|
|
case XML_ENC_ERR_INPUT: |
|
|
ret = XML_ERR_INVALID_ENCODING; |
|
|
break; |
|
|
case XML_ENC_ERR_MEMORY: |
|
|
ret = XML_ERR_NO_MEMORY; |
|
|
break; |
|
|
default: |
|
|
ret = XML_ERR_INTERNAL_ERROR; |
|
|
break; |
|
|
} |
|
|
|
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncError |
|
|
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, |
|
|
int *outlen, const unsigned char *in, int *inlen, |
|
|
int flush) { |
|
|
xmlCharEncError ret; |
|
|
|
|
|
if (handler->flags & XML_HANDLER_LEGACY) { |
|
|
xmlCharEncodingInputFunc func = handler->input.legacyFunc; |
|
|
|
|
|
if (func == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
|
|
|
ret = func(out, outlen, in, inlen); |
|
|
} else { |
|
|
xmlCharEncConvFunc func = handler->input.func; |
|
|
int oldInlen; |
|
|
|
|
|
if (func == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
|
|
|
oldInlen = *inlen; |
|
|
ret = func(handler->inputCtxt, out, outlen, in, inlen, flush); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ((flush) && (ret == XML_ENC_ERR_SUCCESS) && (*inlen != oldInlen)) |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
} |
|
|
|
|
|
if (ret > 0) |
|
|
ret = XML_ENC_ERR_SUCCESS; |
|
|
|
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlCharEncError |
|
|
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, |
|
|
int *outlen, const unsigned char *in, int *inlen) { |
|
|
xmlCharEncError ret; |
|
|
|
|
|
if (handler->flags & XML_HANDLER_LEGACY) { |
|
|
xmlCharEncodingOutputFunc func = handler->output.legacyFunc; |
|
|
|
|
|
if (func == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
|
|
|
ret = func(out, outlen, in, inlen); |
|
|
} else { |
|
|
xmlCharEncConvFunc func = handler->output.func; |
|
|
|
|
|
if (func == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
} |
|
|
|
|
|
ret = func(handler->outputCtxt, out, outlen, in, inlen, 0); |
|
|
} |
|
|
|
|
|
if (ret > 0) |
|
|
ret = XML_ENC_ERR_SUCCESS; |
|
|
|
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, struct _xmlBuffer *out, |
|
|
struct _xmlBuffer *in) { |
|
|
return(xmlCharEncInFunc(handler, out, in)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xmlCharEncError |
|
|
xmlCharEncInput(xmlParserInputBuffer *input, size_t *sizeOut, int flush) |
|
|
{ |
|
|
xmlBufPtr out, in; |
|
|
const xmlChar *dataIn; |
|
|
size_t availIn; |
|
|
size_t maxOut; |
|
|
size_t totalIn, totalOut; |
|
|
xmlCharEncError ret; |
|
|
|
|
|
out = input->buffer; |
|
|
in = input->raw; |
|
|
|
|
|
maxOut = *sizeOut; |
|
|
totalOut = 0; |
|
|
|
|
|
*sizeOut = 0; |
|
|
|
|
|
availIn = xmlBufUse(in); |
|
|
if ((availIn == 0) && (!flush)) |
|
|
return(0); |
|
|
dataIn = xmlBufContent(in); |
|
|
totalIn = 0; |
|
|
|
|
|
while (1) { |
|
|
size_t availOut; |
|
|
int completeOut, completeIn; |
|
|
int c_out, c_in; |
|
|
|
|
|
availOut = xmlBufAvail(out); |
|
|
if (availOut > INT_MAX / 2) |
|
|
availOut = INT_MAX / 2; |
|
|
|
|
|
if (availOut < maxOut) { |
|
|
c_out = availOut; |
|
|
completeOut = 0; |
|
|
} else { |
|
|
c_out = maxOut; |
|
|
completeOut = 1; |
|
|
} |
|
|
|
|
|
if (availIn > INT_MAX / 2) { |
|
|
c_in = INT_MAX / 2; |
|
|
completeIn = 0; |
|
|
} else { |
|
|
c_in = availIn; |
|
|
completeIn = 1; |
|
|
} |
|
|
|
|
|
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, |
|
|
dataIn, &c_in, flush && completeIn); |
|
|
|
|
|
totalIn += c_in; |
|
|
dataIn += c_in; |
|
|
availIn -= c_in; |
|
|
|
|
|
totalOut += c_out; |
|
|
maxOut -= c_out; |
|
|
xmlBufAddLen(out, c_out); |
|
|
|
|
|
if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE)) { |
|
|
input->error = xmlEncConvertError(ret); |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
if ((completeOut) && (completeIn)) |
|
|
break; |
|
|
if ((completeOut) && (ret == XML_ENC_ERR_SPACE)) |
|
|
break; |
|
|
if ((completeIn) && (ret == XML_ENC_ERR_SUCCESS)) |
|
|
break; |
|
|
|
|
|
if (ret == XML_ENC_ERR_SPACE) { |
|
|
if (xmlBufGrow(out, 4096) < 0) { |
|
|
input->error = XML_ERR_NO_MEMORY; |
|
|
return(XML_ENC_ERR_MEMORY); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
xmlBufShrink(in, totalIn); |
|
|
|
|
|
if (input->rawconsumed > ULONG_MAX - (unsigned long) totalIn) |
|
|
input->rawconsumed = ULONG_MAX; |
|
|
else |
|
|
input->rawconsumed += totalIn; |
|
|
|
|
|
*sizeOut = totalOut; |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlCharEncInFunc(xmlCharEncodingHandler * handler, struct _xmlBuffer *out, |
|
|
struct _xmlBuffer *in) |
|
|
{ |
|
|
int ret; |
|
|
int written; |
|
|
int toconv; |
|
|
|
|
|
if (handler == NULL) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
if (out == NULL) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
if (in == NULL) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
toconv = in->use; |
|
|
if (toconv == 0) |
|
|
return (0); |
|
|
written = out->size - out->use -1; |
|
|
if (toconv * 2 >= written) { |
|
|
xmlBufferGrow(out, out->size + toconv * 2); |
|
|
written = out->size - out->use - 1; |
|
|
} |
|
|
ret = xmlEncInputChunk(handler, &out->content[out->use], &written, |
|
|
in->content, &toconv, 0); |
|
|
xmlBufferShrink(in, toconv); |
|
|
out->use += written; |
|
|
out->content[out->use] = 0; |
|
|
|
|
|
return (written? written : ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int |
|
|
xmlSerializeDecCharRef(char *buf, int val) { |
|
|
char *out = buf; |
|
|
int len, i; |
|
|
|
|
|
*out++ = '&'; |
|
|
*out++ = '#'; |
|
|
|
|
|
if (val < 100) { |
|
|
len = (val < 10) ? 1 : 2; |
|
|
} else if (val < 10000) { |
|
|
len = (val < 1000) ? 3 : 4; |
|
|
} else if (val < 1000000) { |
|
|
len = (val < 100000) ? 5 : 6; |
|
|
} else { |
|
|
len = 7; |
|
|
} |
|
|
|
|
|
for (i = len - 1; i >= 0; i--) { |
|
|
out[i] = '0' + val % 10; |
|
|
val /= 10; |
|
|
} |
|
|
|
|
|
out[len] = ';'; |
|
|
|
|
|
return(len + 3); |
|
|
} |
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlCharEncOutput(xmlOutputBuffer *output, int init) |
|
|
{ |
|
|
int ret; |
|
|
size_t written; |
|
|
int writtentot = 0; |
|
|
size_t toconv; |
|
|
int c_in; |
|
|
int c_out; |
|
|
xmlBufPtr in; |
|
|
xmlBufPtr out; |
|
|
|
|
|
if ((output == NULL) || (output->encoder == NULL) || |
|
|
(output->buffer == NULL) || (output->conv == NULL)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
out = output->conv; |
|
|
in = output->buffer; |
|
|
|
|
|
retry: |
|
|
|
|
|
written = xmlBufAvail(out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (init) { |
|
|
c_in = 0; |
|
|
c_out = written; |
|
|
|
|
|
xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, |
|
|
NULL, &c_in); |
|
|
xmlBufAddLen(out, c_out); |
|
|
return(c_out); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
toconv = xmlBufUse(in); |
|
|
if (toconv > 64 * 1024) |
|
|
toconv = 64 * 1024; |
|
|
if (toconv * 4 >= written) { |
|
|
if (xmlBufGrow(out, toconv * 4) < 0) { |
|
|
ret = XML_ENC_ERR_MEMORY; |
|
|
goto error; |
|
|
} |
|
|
written = xmlBufAvail(out); |
|
|
} |
|
|
if (written > 256 * 1024) |
|
|
written = 256 * 1024; |
|
|
|
|
|
c_in = toconv; |
|
|
c_out = written; |
|
|
ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, |
|
|
xmlBufContent(in), &c_in); |
|
|
xmlBufShrink(in, c_in); |
|
|
xmlBufAddLen(out, c_out); |
|
|
writtentot += c_out; |
|
|
|
|
|
if (ret == XML_ENC_ERR_SPACE) |
|
|
goto retry; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (ret == XML_ENC_ERR_INPUT) { |
|
|
xmlChar charref[20]; |
|
|
int len = xmlBufUse(in); |
|
|
xmlChar *content = xmlBufContent(in); |
|
|
int cur, charrefLen; |
|
|
|
|
|
cur = xmlGetUTF8Char(content, &len); |
|
|
if (cur <= 0) |
|
|
goto error; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
charrefLen = xmlSerializeDecCharRef((char *) charref, cur); |
|
|
xmlBufGrow(out, charrefLen * 4); |
|
|
c_out = xmlBufAvail(out); |
|
|
c_in = charrefLen; |
|
|
ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, |
|
|
charref, &c_in); |
|
|
if ((ret < 0) || (c_in != charrefLen)) { |
|
|
ret = XML_ENC_ERR_INTERNAL; |
|
|
goto error; |
|
|
} |
|
|
|
|
|
xmlBufShrink(in, len); |
|
|
xmlBufAddLen(out, c_out); |
|
|
writtentot += c_out; |
|
|
goto retry; |
|
|
} |
|
|
|
|
|
error: |
|
|
if (((writtentot <= 0) && (ret != 0)) || |
|
|
(ret == XML_ENC_ERR_MEMORY)) { |
|
|
if (output->error == 0) |
|
|
output->error = xmlEncConvertError(ret); |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
return(writtentot); |
|
|
} |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, struct _xmlBuffer *out, |
|
|
struct _xmlBuffer *in) { |
|
|
int ret; |
|
|
int written; |
|
|
int writtentot = 0; |
|
|
int toconv; |
|
|
|
|
|
if (handler == NULL) return(XML_ENC_ERR_INTERNAL); |
|
|
if (out == NULL) return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
retry: |
|
|
|
|
|
written = out->size - out->use; |
|
|
|
|
|
if (written > 0) |
|
|
written--; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (in == NULL) { |
|
|
toconv = 0; |
|
|
|
|
|
xmlEncOutputChunk(handler, &out->content[out->use], &written, |
|
|
NULL, &toconv); |
|
|
out->use += written; |
|
|
out->content[out->use] = 0; |
|
|
return(0); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
toconv = in->use; |
|
|
if (toconv * 4 >= written) { |
|
|
xmlBufferGrow(out, toconv * 4); |
|
|
written = out->size - out->use - 1; |
|
|
} |
|
|
ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, |
|
|
in->content, &toconv); |
|
|
xmlBufferShrink(in, toconv); |
|
|
out->use += written; |
|
|
writtentot += written; |
|
|
out->content[out->use] = 0; |
|
|
|
|
|
if (ret == XML_ENC_ERR_SPACE) |
|
|
goto retry; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (ret == XML_ENC_ERR_INPUT) { |
|
|
xmlChar charref[20]; |
|
|
int len = in->use; |
|
|
const xmlChar *utf = (const xmlChar *) in->content; |
|
|
int cur, charrefLen; |
|
|
|
|
|
cur = xmlGetUTF8Char(utf, &len); |
|
|
if (cur <= 0) |
|
|
return(ret); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
charrefLen = xmlSerializeDecCharRef((char *) charref, cur); |
|
|
xmlBufferShrink(in, len); |
|
|
xmlBufferGrow(out, charrefLen * 4); |
|
|
written = out->size - out->use - 1; |
|
|
toconv = charrefLen; |
|
|
ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, |
|
|
charref, &toconv); |
|
|
if ((ret < 0) || (toconv != charrefLen)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
out->use += written; |
|
|
writtentot += written; |
|
|
out->content[out->use] = 0; |
|
|
goto retry; |
|
|
} |
|
|
return(writtentot ? writtentot : ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { |
|
|
if (handler == NULL) |
|
|
return(0); |
|
|
|
|
|
if (handler->flags & XML_HANDLER_STATIC) |
|
|
return(0); |
|
|
|
|
|
xmlFree(handler->name); |
|
|
if (handler->ctxtDtor != NULL) { |
|
|
handler->ctxtDtor(handler->inputCtxt); |
|
|
handler->ctxtDtor(handler->outputCtxt); |
|
|
} |
|
|
xmlFree(handler); |
|
|
return(0); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
long |
|
|
xmlByteConsumed(xmlParserCtxt *ctxt) { |
|
|
xmlParserInputPtr in; |
|
|
|
|
|
if (ctxt == NULL) |
|
|
return(-1); |
|
|
in = ctxt->input; |
|
|
if (in == NULL) |
|
|
return(-1); |
|
|
|
|
|
if ((in->buf != NULL) && (in->buf->encoder != NULL)) { |
|
|
int unused = 0; |
|
|
xmlCharEncodingHandler * handler = in->buf->encoder; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (in->end - in->cur > 0) { |
|
|
unsigned char *convbuf; |
|
|
const unsigned char *cur = (const unsigned char *)in->cur; |
|
|
int toconv, ret; |
|
|
|
|
|
convbuf = xmlMalloc(32000); |
|
|
if (convbuf == NULL) |
|
|
return(-1); |
|
|
|
|
|
toconv = in->end - cur; |
|
|
unused = 32000; |
|
|
ret = xmlEncOutputChunk(handler, convbuf, &unused, cur, &toconv); |
|
|
|
|
|
xmlFree(convbuf); |
|
|
|
|
|
if (ret != XML_ENC_ERR_SUCCESS) |
|
|
return(-1); |
|
|
} |
|
|
|
|
|
if (in->buf->rawconsumed < (unsigned long) unused) |
|
|
return(-1); |
|
|
return(in->buf->rawconsumed - unused); |
|
|
} |
|
|
|
|
|
return(in->consumed + (in->cur - in->base)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static xmlCharEncError |
|
|
asciiToAscii(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char* out, int *poutlen, |
|
|
const unsigned char* in, int *pinlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *inend; |
|
|
const unsigned char *instart = in; |
|
|
int inlen, outlen, ret; |
|
|
|
|
|
if (in == NULL) { |
|
|
*pinlen = 0; |
|
|
*poutlen = 0; |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
inlen = *pinlen; |
|
|
outlen = *poutlen; |
|
|
|
|
|
if (outlen < inlen) { |
|
|
inlen = outlen; |
|
|
ret = XML_ENC_ERR_SPACE; |
|
|
} else { |
|
|
ret = inlen; |
|
|
} |
|
|
|
|
|
inend = in + inlen; |
|
|
*poutlen = inlen; |
|
|
*pinlen = inlen; |
|
|
|
|
|
while (in < inend) { |
|
|
unsigned c = *in; |
|
|
|
|
|
if (c >= 0x80) { |
|
|
*poutlen = in - instart; |
|
|
*pinlen = in - instart; |
|
|
return(XML_ENC_ERR_INPUT); |
|
|
} |
|
|
|
|
|
in++; |
|
|
*out++ = c; |
|
|
} |
|
|
|
|
|
return(ret); |
|
|
} |
|
|
|
|
|
static xmlCharEncError |
|
|
latin1ToUTF8(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
unsigned char* outstart = out; |
|
|
const unsigned char* instart = in; |
|
|
unsigned char* outend; |
|
|
const unsigned char* inend; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
outend = out + *outlen; |
|
|
inend = in + *inlen; |
|
|
|
|
|
while (in < inend) { |
|
|
unsigned c = *in; |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
*out++ = c; |
|
|
} else { |
|
|
if (outend - out < 2) |
|
|
goto done; |
|
|
*out++ = (c >> 6) | 0xC0; |
|
|
*out++ = (c & 0x3F) | 0x80; |
|
|
} |
|
|
|
|
|
in++; |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlIsolat1ToUTF8(unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen) { |
|
|
return(latin1ToUTF8( NULL, out, outlen, in, inlen, |
|
|
0)); |
|
|
} |
|
|
|
|
|
static xmlCharEncError |
|
|
UTF8ToUTF8(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
int len; |
|
|
int ret; |
|
|
|
|
|
if (in == NULL) { |
|
|
*inlen = 0; |
|
|
*outlen = 0; |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
if (*outlen < *inlen) { |
|
|
len = *outlen; |
|
|
ret = XML_ENC_ERR_SPACE; |
|
|
} else { |
|
|
len = *inlen; |
|
|
ret = len; |
|
|
} |
|
|
|
|
|
memcpy(out, in, len); |
|
|
|
|
|
*outlen = len; |
|
|
*inlen = len; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
static xmlCharEncError |
|
|
UTF8ToLatin1(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char* outend; |
|
|
const unsigned char* outstart = out; |
|
|
const unsigned char* instart = in; |
|
|
const unsigned char* inend; |
|
|
unsigned c; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
if (in == NULL) { |
|
|
*inlen = 0; |
|
|
*outlen = 0; |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
inend = in + *inlen; |
|
|
outend = out + *outlen; |
|
|
while (in < inend) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
|
|
|
c = *in; |
|
|
|
|
|
if (c < 0x80) { |
|
|
*out++ = c; |
|
|
} else if ((c >= 0xC2) && (c <= 0xC3)) { |
|
|
if (inend - in < 2) |
|
|
break; |
|
|
in++; |
|
|
*out++ = (unsigned char) ((c << 6) | (*in & 0x3F)); |
|
|
} else { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
|
|
|
in++; |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int |
|
|
xmlUTF8ToIsolat1(unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen) { |
|
|
if ((out == NULL) || (outlen == NULL) || (in == NULL) || (inlen == NULL)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
|
|
|
return(UTF8ToLatin1( NULL, out, outlen, in, inlen, |
|
|
0)); |
|
|
} |
|
|
#endif |
|
|
|
|
|
static xmlCharEncError |
|
|
UTF16LEToUTF8(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *instart = in; |
|
|
const unsigned char *inend = in + (*inlen & ~1); |
|
|
unsigned char *outstart = out; |
|
|
unsigned char *outend = out + *outlen; |
|
|
unsigned c, d; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
while (in < inend) { |
|
|
c = in[0] | (in[1] << 8); |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = c; |
|
|
in += 2; |
|
|
out += 1; |
|
|
} else if (c < 0x800) { |
|
|
if (outend - out < 2) |
|
|
goto done; |
|
|
out[0] = (c >> 6) | 0xC0; |
|
|
out[1] = (c & 0x3F) | 0x80; |
|
|
in += 2; |
|
|
out += 2; |
|
|
} else if ((c & 0xF800) != 0xD800) { |
|
|
if (outend - out < 3) |
|
|
goto done; |
|
|
out[0] = (c >> 12) | 0xE0; |
|
|
out[1] = ((c >> 6) & 0x3F) | 0x80; |
|
|
out[2] = (c & 0x3F) | 0x80; |
|
|
in += 2; |
|
|
out += 3; |
|
|
} else { |
|
|
|
|
|
if ((c & 0xFC00) != 0xD800) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (inend - in < 4) |
|
|
break; |
|
|
d = in[2] | (in[3] << 8); |
|
|
if ((d & 0xFC00) != 0xDC00) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (outend - out < 4) |
|
|
goto done; |
|
|
c = (c << 10) + d - ((0xD800 << 10) + 0xDC00 - 0x10000); |
|
|
out[0] = (c >> 18) | 0xF0; |
|
|
out[1] = ((c >> 12) & 0x3F) | 0x80; |
|
|
out[2] = ((c >> 6) & 0x3F) | 0x80; |
|
|
out[3] = (c & 0x3F) | 0x80; |
|
|
in += 4; |
|
|
out += 4; |
|
|
} |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16LE(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *instart = in; |
|
|
const unsigned char *inend; |
|
|
unsigned char *outstart = out; |
|
|
unsigned char *outend; |
|
|
unsigned c, d; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
|
|
|
if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) |
|
|
return(XML_ENC_ERR_INTERNAL); |
|
|
if (in == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(0); |
|
|
} |
|
|
inend = in + *inlen; |
|
|
outend = out + (*outlen & ~1); |
|
|
while (in < inend) { |
|
|
c = in[0]; |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = c; |
|
|
out[1] = 0; |
|
|
in += 1; |
|
|
out += 2; |
|
|
} else { |
|
|
int i, len; |
|
|
unsigned min; |
|
|
|
|
|
if (c < 0xE0) { |
|
|
if (c < 0xC2) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
c &= 0x1F; |
|
|
len = 2; |
|
|
min = 0x80; |
|
|
} else if (c < 0xF0) { |
|
|
c &= 0x0F; |
|
|
len = 3; |
|
|
min = 0x800; |
|
|
} else { |
|
|
c &= 0x0F; |
|
|
len = 4; |
|
|
min = 0x10000; |
|
|
} |
|
|
|
|
|
if (inend - in < len) |
|
|
break; |
|
|
|
|
|
for (i = 1; i < len; i++) { |
|
|
if ((in[i] & 0xC0) != 0x80) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
c = (c << 6) | (in[i] & 0x3F); |
|
|
} |
|
|
|
|
|
if ((c < min) || |
|
|
((c >= 0xD800) && (c <= 0xDFFF)) || |
|
|
(c > 0x10FFFF)) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
|
|
|
if (c < 0x10000) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = c & 0xFF; |
|
|
out[1] = c >> 8; |
|
|
out += 2; |
|
|
} else { |
|
|
if (outend - out < 4) |
|
|
goto done; |
|
|
c -= 0x10000; |
|
|
d = (c & 0x03FF) | 0xDC00; |
|
|
c = (c >> 10) | 0xD800; |
|
|
out[0] = c & 0xFF; |
|
|
out[1] = c >> 8; |
|
|
out[2] = d & 0xFF; |
|
|
out[3] = d >> 8; |
|
|
out += 4; |
|
|
} |
|
|
|
|
|
in += len; |
|
|
} |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16(void *vctxt, |
|
|
unsigned char* outb, int *outlen, |
|
|
const unsigned char* in, int *inlen, |
|
|
int flush) { |
|
|
if (in == NULL) { |
|
|
|
|
|
|
|
|
|
|
|
if (*outlen >= 2) { |
|
|
outb[0] = 0xFF; |
|
|
outb[1] = 0xFE; |
|
|
*outlen = 2; |
|
|
*inlen = 0; |
|
|
return(2); |
|
|
} |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(0); |
|
|
} |
|
|
return (UTF8ToUTF16LE(vctxt, outb, outlen, in, inlen, flush)); |
|
|
} |
|
|
#endif |
|
|
|
|
|
static xmlCharEncError |
|
|
UTF16BEToUTF8(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *instart = in; |
|
|
const unsigned char *inend = in + (*inlen & ~1); |
|
|
unsigned char *outstart = out; |
|
|
unsigned char *outend = out + *outlen; |
|
|
unsigned c, d; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
while (in < inend) { |
|
|
c = (in[0] << 8) | in[1]; |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = c; |
|
|
in += 2; |
|
|
out += 1; |
|
|
} else if (c < 0x800) { |
|
|
if (outend - out < 2) |
|
|
goto done; |
|
|
out[0] = (c >> 6) | 0xC0; |
|
|
out[1] = (c & 0x3F) | 0x80; |
|
|
in += 2; |
|
|
out += 2; |
|
|
} else if ((c & 0xF800) != 0xD800) { |
|
|
if (outend - out < 3) |
|
|
goto done; |
|
|
out[0] = (c >> 12) | 0xE0; |
|
|
out[1] = ((c >> 6) & 0x3F) | 0x80; |
|
|
out[2] = (c & 0x3F) | 0x80; |
|
|
in += 2; |
|
|
out += 3; |
|
|
} else { |
|
|
|
|
|
if ((c & 0xFC00) != 0xD800) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (inend - in < 4) |
|
|
break; |
|
|
d = (in[2] << 8) | in[3]; |
|
|
if ((d & 0xFC00) != 0xDC00) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (outend - out < 4) |
|
|
goto done; |
|
|
c = (c << 10) + d - ((0xD800 << 10) + 0xDC00 - 0x10000); |
|
|
out[0] = (c >> 18) | 0xF0; |
|
|
out[1] = ((c >> 12) & 0x3F) | 0x80; |
|
|
out[2] = ((c >> 6) & 0x3F) | 0x80; |
|
|
out[3] = (c & 0x3F) | 0x80; |
|
|
in += 4; |
|
|
out += 4; |
|
|
} |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED |
|
|
static xmlCharEncError |
|
|
UTF8ToUTF16BE(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *instart = in; |
|
|
const unsigned char *inend; |
|
|
unsigned char *outstart = out; |
|
|
unsigned char *outend; |
|
|
unsigned c, d; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
|
|
|
if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); |
|
|
if (in == NULL) { |
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(0); |
|
|
} |
|
|
inend = in + *inlen; |
|
|
outend = out + (*outlen & ~1); |
|
|
while (in < inend) { |
|
|
c = in[0]; |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = 0; |
|
|
out[1] = c; |
|
|
in += 1; |
|
|
out += 2; |
|
|
} else { |
|
|
int i, len; |
|
|
unsigned min; |
|
|
|
|
|
if (c < 0xE0) { |
|
|
if (c < 0xC2) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
c &= 0x1F; |
|
|
len = 2; |
|
|
min = 0x80; |
|
|
} else if (c < 0xF0) { |
|
|
c &= 0x0F; |
|
|
len = 3; |
|
|
min = 0x800; |
|
|
} else { |
|
|
c &= 0x0F; |
|
|
len = 4; |
|
|
min = 0x10000; |
|
|
} |
|
|
|
|
|
if (inend - in < len) |
|
|
break; |
|
|
|
|
|
for (i = 1; i < len; i++) { |
|
|
if ((in[i] & 0xC0) != 0x80) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
c = (c << 6) | (in[i] & 0x3F); |
|
|
} |
|
|
|
|
|
if ((c < min) || |
|
|
((c >= 0xD800) && (c <= 0xDFFF)) || |
|
|
(c > 0x10FFFF)) { |
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
|
|
|
if (c < 0x10000) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
out[0] = c >> 8; |
|
|
out[1] = c & 0xFF; |
|
|
out += 2; |
|
|
} else { |
|
|
if (outend - out < 4) |
|
|
goto done; |
|
|
c -= 0x10000; |
|
|
d = (c & 0x03FF) | 0xDC00; |
|
|
c = (c >> 10) | 0xD800; |
|
|
out[0] = c >> 8; |
|
|
out[1] = c & 0xFF; |
|
|
out[2] = d >> 8; |
|
|
out[3] = d & 0xFF; |
|
|
out += 4; |
|
|
} |
|
|
|
|
|
in += len; |
|
|
} |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
#endif |
|
|
|
|
|
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED) |
|
|
static xmlCharEncError |
|
|
UTF8ToHtmlWrapper(void *vctxt ATTRIBUTE_UNUSED, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
return(htmlUTF8ToHtml(out, outlen, in, inlen)); |
|
|
} |
|
|
#endif |
|
|
|
|
|
static xmlCharEncError |
|
|
Utf8ToEightBit(void *vctxt, |
|
|
unsigned char *out, int *outlen, |
|
|
const unsigned char *in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
const unsigned char *xlattable = vctxt; |
|
|
const unsigned char *instart = in; |
|
|
const unsigned char *inend; |
|
|
unsigned char *outstart = out; |
|
|
unsigned char *outend; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
if (in == NULL) { |
|
|
|
|
|
|
|
|
|
|
|
*outlen = 0; |
|
|
*inlen = 0; |
|
|
return(XML_ENC_ERR_SUCCESS); |
|
|
} |
|
|
|
|
|
inend = in + *inlen; |
|
|
outend = out + *outlen; |
|
|
while (in < inend) { |
|
|
unsigned d = *in; |
|
|
|
|
|
if (d < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
in += 1; |
|
|
} else if (d < 0xE0) { |
|
|
unsigned c; |
|
|
|
|
|
if (inend - in < 2) |
|
|
break; |
|
|
c = in[1] & 0x3F; |
|
|
d = d & 0x1F; |
|
|
d = xlattable [48 + c + xlattable [d] * 64]; |
|
|
if (d == 0) { |
|
|
|
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
in += 2; |
|
|
} else if (d < 0xF0) { |
|
|
unsigned c1; |
|
|
unsigned c2; |
|
|
|
|
|
if (inend - in < 3) |
|
|
break; |
|
|
c1 = in[1] & 0x3F; |
|
|
c2 = in[2] & 0x3F; |
|
|
d = d & 0x0F; |
|
|
d = xlattable [48 + c2 + xlattable [48 + c1 + |
|
|
xlattable [32 + d] * 64] * 64]; |
|
|
if (d == 0) { |
|
|
|
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
in += 3; |
|
|
} else { |
|
|
|
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
|
|
|
*out++ = d; |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
static xmlCharEncError |
|
|
EightBitToUtf8(void *vctxt, |
|
|
unsigned char* out, int *outlen, |
|
|
const unsigned char* in, int *inlen, |
|
|
int flush ATTRIBUTE_UNUSED) { |
|
|
unsigned short const *unicodetable = vctxt; |
|
|
const unsigned char* instart = in; |
|
|
const unsigned char* inend; |
|
|
unsigned char* outstart = out; |
|
|
unsigned char* outend; |
|
|
int ret = XML_ENC_ERR_SPACE; |
|
|
|
|
|
outend = out + *outlen; |
|
|
inend = in + *inlen; |
|
|
|
|
|
while (in < inend) { |
|
|
unsigned c = *in; |
|
|
|
|
|
if (c < 0x80) { |
|
|
if (out >= outend) |
|
|
goto done; |
|
|
*out++ = c; |
|
|
} else { |
|
|
c = unicodetable[c - 0x80]; |
|
|
if (c == 0) { |
|
|
|
|
|
ret = XML_ENC_ERR_INPUT; |
|
|
goto done; |
|
|
} |
|
|
if (c < 0x800) { |
|
|
if (outend - out < 2) |
|
|
goto done; |
|
|
*out++ = ((c >> 6) & 0x1F) | 0xC0; |
|
|
*out++ = (c & 0x3F) | 0x80; |
|
|
} else { |
|
|
if (outend - out < 3) |
|
|
goto done; |
|
|
*out++ = ((c >> 12) & 0x0F) | 0xE0; |
|
|
*out++ = ((c >> 6) & 0x3F) | 0x80; |
|
|
*out++ = (c & 0x3F) | 0x80; |
|
|
} |
|
|
} |
|
|
|
|
|
in += 1; |
|
|
} |
|
|
|
|
|
ret = out - outstart; |
|
|
|
|
|
done: |
|
|
*outlen = out - outstart; |
|
|
*inlen = in - instart; |
|
|
return(ret); |
|
|
} |
|
|
|
|
|
|