diff options
-rw-r--r-- | lib/libexpat/Changes | 40 | ||||
-rw-r--r-- | lib/libexpat/README | 139 | ||||
-rw-r--r-- | lib/libexpat/README.md | 126 | ||||
-rw-r--r-- | lib/libexpat/expat_config.h | 3 | ||||
-rw-r--r-- | lib/libexpat/lib/expat.h | 2 | ||||
-rw-r--r-- | lib/libexpat/lib/siphash.h | 5 | ||||
-rw-r--r-- | lib/libexpat/lib/xmlparse.c | 326 | ||||
-rw-r--r-- | lib/libexpat/lib/xmlrole.c | 30 | ||||
-rw-r--r-- | lib/libexpat/lib/xmltok.c | 13 | ||||
-rw-r--r-- | lib/libexpat/lib/xmltok_impl.c | 39 | ||||
-rw-r--r-- | lib/libexpat/tests/chardata.c | 2 | ||||
-rw-r--r-- | lib/libexpat/tests/chardata.h | 2 | ||||
-rw-r--r-- | lib/libexpat/tests/minicheck.c | 32 | ||||
-rw-r--r-- | lib/libexpat/tests/runtests.c | 9257 |
14 files changed, 9387 insertions, 629 deletions
diff --git a/lib/libexpat/Changes b/lib/libexpat/Changes index 9fb36dda37c..147f48288aa 100644 --- a/lib/libexpat/Changes +++ b/lib/libexpat/Changes @@ -2,6 +2,46 @@ NOTE: We are looking for help with a few things: https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! +Release 2.2.3 Wed August 2 2017 + Security fixes: + #82 CVE-2017-11742 -- Windows: Fix DLL hijacking vulnerability + using Steve Holme's LoadLibrary wrapper for/of cURL + + Bug fixes: + #85 Fix a dangling pointer issue related to realloc + + Other changes: + Increase code coverage + #91 Linux: Allow getrandom to fail if nonblocking pool has not + yet been initialized and read /dev/urandom then, instead. + This is in line with what recent Python does. + #81 Pre-10.7/Lion macOS: Support entropy from arc4random + #86 Check that a UTF-16 encoding in an XML declaration has the + right endianness + #4 #5 #7 Recover correctly when some reallocations fail + Repair "./configure && make" for systems without any + provider of high quality entropy + and try reading /dev/urandom on those + Ensure that user-defined character encodings have converter + functions when they are needed + Fix mis-leading description of argument -c in xmlwf.1 + Rely on macro HAVE_ARC4RANDOM_BUF (rather than __CloudABI__) + for CloudABI + #100 Fix use of SIPHASH_MAIN in siphash.h + #23 Test suite: Fix memory leaks + Version info bumped from 7:4:6 to 7:5:6 + + Special thanks to: + Chanho Park + Joe Orton + Pascal Cuoq + Rhodri James + Simon McVittie + Vadim Zeitlin + Viktor Szakats + and + Core Infrastructure Initiative + Release 2.2.2 Wed July 12 2017 Security fixes: #43 Protect against compilation without any source of high diff --git a/lib/libexpat/README b/lib/libexpat/README deleted file mode 100644 index c86018f4aaf..00000000000 --- a/lib/libexpat/README +++ /dev/null @@ -1,139 +0,0 @@ - - Expat, Release 2.2.2 - -This is Expat, a C library for parsing XML, written by James Clark. -Expat is a stream-oriented XML parser. This means that you register -handlers with the parser before starting the parse. These handlers -are called when the parser discovers the associated structures in the -document being parsed. A start tag is an example of the kind of -structures for which you may register handlers. - -Windows users should use the expat_win32bin package, which includes -both precompiled libraries and executables, and source code for -developers. - -Expat is free software. You may copy, distribute, and modify it under -the terms of the License contained in the file COPYING distributed -with this package. This license is the same as the MIT/X Consortium -license. - -Versions of Expat that have an odd minor version (the middle number in -the release above), are development releases and should be considered -as beta software. Releases with even minor version numbers are -intended to be production grade software. - -If you are building Expat from a check-out from the CVS repository, -you need to run a script that generates the configure script using the -GNU autoconf and libtool tools. To do this, you need to have -autoconf 2.58 or newer. Run the script like this: - - ./buildconf.sh - -Once this has been done, follow the same instructions as for building -from a source distribution. - -To build Expat from a source distribution, you first run the -configuration shell script in the top level distribution directory: - - ./configure - -There are many options which you may provide to configure (which you -can discover by running configure with the --help option). But the -one of most interest is the one that sets the installation directory. -By default, the configure script will set things up to install -libexpat into /usr/local/lib, expat.h into /usr/local/include, and -xmlwf into /usr/local/bin. If, for example, you'd prefer to install -into /home/me/mystuff/lib, /home/me/mystuff/include, and -/home/me/mystuff/bin, you can tell configure about that with: - - ./configure --prefix=/home/me/mystuff - -Another interesting option is to enable 64-bit integer support for -line and column numbers and the over-all byte index: - - ./configure CPPFLAGS=-DXML_LARGE_SIZE - -However, such a modification would be a breaking change to the ABI -and is therefore not recommended for general use - e.g. as part of -a Linux distribution - but rather for builds with special requirements. - -After running the configure script, the "make" command will build -things and "make install" will install things into their proper -location. Have a look at the "Makefile" to learn about additional -"make" options. Note that you need to have write permission into -the directories into which things will be installed. - -If you are interested in building Expat to provide document -information in UTF-16 encoding rather than the default UTF-8, follow -these instructions (after having run "make distclean"): - - 1. For UTF-16 output as unsigned short (and version/error - strings as char), run: - - ./configure CPPFLAGS=-DXML_UNICODE - - For UTF-16 output as wchar_t (incl. version/error strings), - run: - - ./configure CFLAGS="-g -O2 -fshort-wchar" \ - CPPFLAGS=-DXML_UNICODE_WCHAR_T - - 2. Edit the MakeFile, changing: - - LIBRARY = libexpat.la - - to: - - LIBRARY = libexpatw.la - - (Note the additional "w" in the library name.) - - 3. Run "make buildlib" (which builds the library only). - Or, to save step 2, run "make buildlib LIBRARY=libexpatw.la". - - 4. Run "make installlib" (which installs the library only). - Or, if step 2 was omitted, run "make installlib LIBRARY=libexpatw.la". - -Using DESTDIR or INSTALL_ROOT is enabled, with INSTALL_ROOT being the default -value for DESTDIR, and the rest of the make file using only DESTDIR. -It works as follows: - $ make install DESTDIR=/path/to/image -overrides the in-makefile set DESTDIR, while both - $ INSTALL_ROOT=/path/to/image make install - $ make install INSTALL_ROOT=/path/to/image -use DESTDIR=$(INSTALL_ROOT), even if DESTDIR eventually is defined in the -environment, because variable-setting priority is -1) commandline -2) in-makefile -3) environment - -Note: This only applies to the Expat library itself, building UTF-16 versions -of xmlwf and the tests is currently not supported. - -Note for Solaris users: The "ar" command is usually located in -"/usr/ccs/bin", which is not in the default PATH. You will need to -add this to your path for the "make" command, and probably also switch -to GNU make (the "make" found in /usr/ccs/bin does not seem to work -properly -- apparently it does not understand .PHONY directives). If -you're using ksh or bash, use this command to build: - - PATH=/usr/ccs/bin:$PATH make - -When using Expat with a project using autoconf for configuration, you -can use the probing macro in conftools/expat.m4 to determine how to -include Expat. See the comments at the top of that file for more -information. - -A reference manual is available in the file doc/reference.html in this -distribution. - -The homepage for this project is http://www.libexpat.org/. There -are links there to connect you to the bug reports page. If you need -to report a bug when you don't have access to a browser, you may also -send a bug report by email to expat-bugs@mail.libexpat.org. - -Discussion related to the direction of future expat development takes -place on expat-discuss@mail.libexpat.org. Archives of this list and -other Expat-related lists may be found at: - - http://mail.libexpat.org/mailman/listinfo/ diff --git a/lib/libexpat/README.md b/lib/libexpat/README.md new file mode 100644 index 00000000000..0a1777e7eac --- /dev/null +++ b/lib/libexpat/README.md @@ -0,0 +1,126 @@ +# Expat, Release 2.2.3 + +This is Expat, a C library for parsing XML, started by +[James Clark](https://en.wikipedia.org/wiki/James_Clark_(programmer)) in 1997. +Expat is a stream-oriented XML parser. This means that you register +handlers with the parser before starting the parse. These handlers +are called when the parser discovers the associated structures in the +document being parsed. A start tag is an example of the kind of +structures for which you may register handlers. + +Windows users should use the +[`expat_win32` package](https://sourceforge.net/projects/expat/files/expat_win32/), +which includes both precompiled libraries and executables, and source code for +developers. + +Expat is [free software](https://www.gnu.org/philosophy/free-sw.en.html). +You may copy, distribute, and modify it under the terms of the License +contained in the file +[`COPYING`](https://github.com/libexpat/libexpat/blob/master/expat/COPYING) +distributed with this package. +This license is the same as the MIT/X Consortium license. + +If you are building Expat from a check-out from the +[Git repository](https://github.com/libexpat/libexpat/), +you need to run a script that generates the configure script using the +GNU autoconf and libtool tools. To do this, you need to have +autoconf 2.58 or newer. Run the script like this: + +```console +./buildconf.sh +``` + +Once this has been done, follow the same instructions as for building +from a source distribution. + +To build Expat from a source distribution, you first run the +configuration shell script in the top level distribution directory: + +```console +./configure +``` + +There are many options which you may provide to configure (which you +can discover by running configure with the `--help` option). But the +one of most interest is the one that sets the installation directory. +By default, the configure script will set things up to install +libexpat into `/usr/local/lib`, `expat.h` into `/usr/local/include`, and +`xmlwf` into `/usr/local/bin`. If, for example, you'd prefer to install +into `/home/me/mystuff/lib`, `/home/me/mystuff/include`, and +`/home/me/mystuff/bin`, you can tell `configure` about that with: + +```console +./configure --prefix=/home/me/mystuff +``` + +Another interesting option is to enable 64-bit integer support for +line and column numbers and the over-all byte index: + +```console +./configure CPPFLAGS=-DXML_LARGE_SIZE +``` + +However, such a modification would be a breaking change to the ABI +and is therefore not recommended for general use — e.g. as part of +a Linux distribution — but rather for builds with special requirements. + +After running the configure script, the `make` command will build +things and `make install` will install things into their proper +location. Have a look at the `Makefile` to learn about additional +`make` options. Note that you need to have write permission into +the directories into which things will be installed. + +If you are interested in building Expat to provide document +information in UTF-16 encoding rather than the default UTF-8, follow +these instructions (after having run `make distclean`): + +1. For UTF-16 output as unsigned short (and version/error strings as char), + run:<br/> + `./configure CPPFLAGS=-DXML_UNICODE`<br/> + For UTF-16 output as `wchar_t` (incl. version/error strings), run:<br/> + `./configure CFLAGS="-g -O2 -fshort-wchar" CPPFLAGS=-DXML_UNICODE_WCHAR_T` + <br/>Note: The latter requires libc compiled with `-fshort-wchar`, as well. + +1. Edit `Makefile`, changing:<br/> + `LIBRARY = libexpat.la`<br/> + to:<br/> + `LIBRARY = libexpatw.la`<br/> + (Note the additional "w" in the library name.) + +1. Run `make buildlib` (which builds the library only). + Or, to save step 2, run `make buildlib LIBRARY=libexpatw.la`. + +1. Run `make installlib` (which installs the library only). + Or, if step 2 was omitted, run `make installlib LIBRARY=libexpatw.la`. + +Using `DESTDIR` or `INSTALL_ROOT` is enabled, with `INSTALL_ROOT` being the +default value for `DESTDIR`, and the rest of the make file using only +`DESTDIR`. It works as follows: + +```console +make install DESTDIR=/path/to/image +``` + +overrides the in-makefile set `DESTDIR`, while both + +```console +INSTALL_ROOT=/path/to/image make install +make install INSTALL_ROOT=/path/to/image +``` + +use `DESTDIR=$(INSTALL_ROOT)`, even if `DESTDIR` eventually is defined in the +environment, because variable-setting priority is +1. commandline +2. in-makefile +3. environment + +Note: This only applies to the Expat library itself, building UTF-16 versions +of xmlwf and the tests is currently not supported. + +When using Expat with a project using autoconf for configuration, you +can use the probing macro in `conftools/expat.m4` to determine how to +include Expat. See the comments at the top of that file for more +information. + +A reference manual is available in the file `doc/reference.html` in this +distribution. diff --git a/lib/libexpat/expat_config.h b/lib/libexpat/expat_config.h index 623b6dc935f..30b865c212f 100644 --- a/lib/libexpat/expat_config.h +++ b/lib/libexpat/expat_config.h @@ -1,7 +1,8 @@ -/* $OpenBSD: expat_config.h,v 1.3 2017/06/30 11:25:29 bluhm Exp $ */ +/* $OpenBSD: expat_config.h,v 1.4 2017/08/04 16:24:03 bluhm Exp $ */ /* quick and dirty conf for OpenBSD */ +#define HAVE_ARC4RANDOM 1 #define HAVE_ARC4RANDOM_BUF 1 #define HAVE_MEMMOVE 1 #define XML_CONTEXT_BYTES 1024 diff --git a/lib/libexpat/lib/expat.h b/lib/libexpat/lib/expat.h index b6b02f4955f..7e5bbb7e393 100644 --- a/lib/libexpat/lib/expat.h +++ b/lib/libexpat/lib/expat.h @@ -1048,7 +1048,7 @@ XML_GetFeatureList(void); */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 2 -#define XML_MICRO_VERSION 2 +#define XML_MICRO_VERSION 3 #ifdef __cplusplus } diff --git a/lib/libexpat/lib/siphash.h b/lib/libexpat/lib/siphash.h index 30a6f11d93c..581872df7b4 100644 --- a/lib/libexpat/lib/siphash.h +++ b/lib/libexpat/lib/siphash.h @@ -11,6 +11,9 @@ * -------------------------------------------------------------------------- * HISTORY: * + * 2017-07-25 (Vadim Zeitlin) + * - Fix use of SIPHASH_MAIN macro + * * 2017-07-05 (Sebastian Pipping) * - Use _SIP_ULL macro to not require a C++11 compiler if compiled as C++ * - Add const qualifiers at two places @@ -350,7 +353,7 @@ static int sip24_valid(void) { } /* sip24_valid() */ -#if SIPHASH_MAIN +#ifdef SIPHASH_MAIN #include <stdio.h> diff --git a/lib/libexpat/lib/xmlparse.c b/lib/libexpat/lib/xmlparse.c index f97e7a869b1..1abce6014ec 100644 --- a/lib/libexpat/lib/xmlparse.c +++ b/lib/libexpat/lib/xmlparse.c @@ -1,7 +1,7 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file COPYING for copying permission. - cd4063469a95eab9a93001afb109e3dee122cdda4635bbec36257fc01c327348 (2.2.2+) + 101bfd65d1ff3d1511cf6671e6aae65f82cd97df6f4da137d46d510731830ad9 (2.2.3+) */ #if !defined(_GNU_SOURCE) @@ -21,6 +21,8 @@ #include <sys/time.h> /* gettimeofday() */ #include <sys/types.h> /* getpid() */ #include <unistd.h> /* getpid() */ +#include <fcntl.h> /* O_RDONLY */ +#include <errno.h> #endif #define XML_BUILDING_EXPAT 1 @@ -1665,9 +1667,22 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (errorCode == XML_ERROR_NONE) { switch (ps_parsing) { case XML_SUSPENDED: + /* It is hard to be certain, but it seems that this case + * cannot occur. This code is cleaning up a previous parse + * with no new data (since len == 0). Changing the parsing + * state requires getting to execute a handler function, and + * there doesn't seem to be an opportunity for that while in + * this circumstance. + * + * Given the uncertainty, we retain the code but exclude it + * from coverage tests. + * + * LCOV_EXCL_START + */ XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); positionPtr = bufferPtr; return XML_STATUS_SUSPENDED; + /* LCOV_EXCL_STOP */ case XML_INITIALIZED: case XML_PARSING: ps_parsing = XML_FINISHED; @@ -2856,9 +2871,17 @@ doContent(XML_Parser parser, return XML_ERROR_NO_MEMORY; break; default: + /* All of the tokens produced by XmlContentTok() have their own + * explicit cases, so this default is not strictly necessary. + * However it is a useful safety net, so we retain the code and + * simply exclude it from the coverage tests. + * + * LCOV_EXCL_START + */ if (defaultHandler) reportDefault(parser, enc, s, next); break; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; switch (ps_parsing) { @@ -2949,13 +2972,17 @@ storeAtts(XML_Parser parser, const ENCODING *enc, #endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); - if (temp == NULL) + if (temp == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } atts = temp; #ifdef XML_ATTR_INFO temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); - if (temp2 == NULL) + if (temp2 == NULL) { + attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; + } attInfo = temp2; #endif if (n > oldAttsSize) @@ -3092,6 +3119,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, int j; /* hash table index */ unsigned long version = nsAttsVersion; int nsAttsSize = (int)1 << nsAttsPower; + unsigned char oldNsAttsPower = nsAttsPower; /* size of hash table must be at least 2 * (# of prefixed attributes) */ if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ NS_ATT *temp; @@ -3101,8 +3129,11 @@ storeAtts(XML_Parser parser, const ENCODING *enc, nsAttsPower = 3; nsAttsSize = (int)1 << nsAttsPower; temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); - if (!temp) + if (!temp) { + /* Restore actual size of memory in nsAtts */ + nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; + } nsAtts = temp; version = 0; /* force re-initialization of nsAtts hash table */ } @@ -3129,8 +3160,23 @@ storeAtts(XML_Parser parser, const ENCODING *enc, ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); - if (!id || !id->prefix) - return XML_ERROR_NO_MEMORY; + if (!id || !id->prefix) { + /* This code is walking through the appAtts array, dealing + * with (in this case) a prefixed attribute name. To be in + * the array, the attribute must have already been bound, so + * has to have passed through the hash table lookup once + * already. That implies that an entry for it already + * exists, so the lookup above will return a pointer to + * already allocated memory. There is no opportunaity for + * the allocator to fail, so the condition above cannot be + * fulfilled. + * + * Since it is difficult to be certain that the above + * analysis is complete, we retain the test and merely + * remove the code from coverage tests. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } b = id->prefix->binding; if (!b) return XML_ERROR_UNBOUND_PREFIX; @@ -3507,8 +3553,16 @@ doCdataSection(XML_Parser parser, } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: + /* Every token returned by XmlCdataSectionTok() has its own + * explicit case, so this default case will never be executed. + * We retain it as a safety net and exclude it from the coverage + * statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } *eventPP = s = next; @@ -3568,8 +3622,20 @@ doIgnoreSection(XML_Parser parser, eventEndPP = &eventEndPtr; } else { + /* It's not entirely clear, but it seems the following two lines + * of code cannot be executed. The only occasions on which 'enc' + * is not 'parser->m_encoding' are when this function is called + * from the internal entity processing, and IGNORE sections are an + * error in internal entities. + * + * Since it really isn't clear that this is true, we keep the code + * and just remove it from our coverage tests. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } *eventPP = s; *startPtr = NULL; @@ -3602,8 +3668,16 @@ doIgnoreSection(XML_Parser parser, } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: + /* All of the tokens that XmlIgnoreSectionTok() returns have + * explicit cases to handle them, so this default case is never + * executed. We keep it as a safety net anyway, and remove it + * from our test coverage statistics. + * + * LCOV_EXCL_START + */ *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } /* not reached */ } @@ -3700,7 +3774,14 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, reportDefault(parser, encoding, s, next); if (protocolEncodingName == NULL) { if (newEncoding) { - if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { + /* Check that the specified encoding does not conflict with what + * the parser has already deduced. Do we have the same number + * of bytes in the smallest representation of a character? If + * this is UTF-16, is it the same endianness? + */ + if (newEncoding->minBytesPerChar != encoding->minBytesPerChar + || (newEncoding->minBytesPerChar == 2 && + newEncoding != encoding)) { eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } @@ -3845,15 +3926,14 @@ entityValueInitProcessor(XML_Parser parser, result = processXmlDecl(parser, 0, start, next); if (result != XML_ERROR_NONE) return result; - switch (ps_parsing) { - case XML_SUSPENDED: - *nextPtr = next; - return XML_ERROR_NONE; - case XML_FINISHED: + /* At this point, ps_parsing cannot be XML_SUSPENDED. For that + * to happen, a parameter entity parsing handler must have + * attempted to suspend the parser, which fails and raises an + * error. The parser can be aborted, but can't be suspended. + */ + if (ps_parsing == XML_FINISHED) return XML_ERROR_ABORTED; - default: - *nextPtr = next; - } + *nextPtr = next; /* stop scanning for text declaration - we found one */ processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); @@ -4176,8 +4256,14 @@ doProlog(XML_Parser parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); - if (!entity) - return XML_ERROR_NO_MEMORY; + if (!entity) { + /* The external subset name "#" will have already been + * inserted into the hash table at the start of the + * external entity parsing, so no allocation will happen + * and lookup() cannot fail. + */ + return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */ + } if (useForeignDTD) entity->base = curBase; dtd->paramEntityRead = XML_FALSE; @@ -4656,8 +4742,10 @@ doProlog(XML_Parser parser, if (prologState.level >= groupSize) { if (groupSize) { char *temp = (char *)REALLOC(groupConnector, groupSize *= 2); - if (temp == NULL) + if (temp == NULL) { + groupSize /= 2; return XML_ERROR_NO_MEMORY; + } groupConnector = temp; if (dtd->scaffIndex) { int *temp = (int *)REALLOC(dtd->scaffIndex, @@ -4669,8 +4757,10 @@ doProlog(XML_Parser parser, } else { groupConnector = (char *)MALLOC(groupSize = 32); - if (!groupConnector) + if (!groupConnector) { + groupSize = 0; return XML_ERROR_NO_MEMORY; + } } } groupConnector[prologState.level] = 0; @@ -4733,8 +4823,29 @@ doProlog(XML_Parser parser, : !dtd->hasParamEntityRefs)) { if (!entity) return XML_ERROR_UNDEFINED_ENTITY; - else if (!entity->is_internal) - return XML_ERROR_ENTITY_DECLARED_IN_PE; + else if (!entity->is_internal) { + /* It's hard to exhaustively search the code to be sure, + * but there doesn't seem to be a way of executing the + * following line. There are two cases: + * + * If 'standalone' is false, the DTD must have no + * parameter entities or we wouldn't have passed the outer + * 'if' statement. That measn the only entity in the hash + * table is the external subset name "#" which cannot be + * given as a parameter entity name in XML syntax, so the + * lookup must have returned NULL and we don't even reach + * the test for an internal entity. + * + * If 'standalone' is true, it does not seem to be + * possible to create entities taking this code path that + * are not internal entities, so fail the test above. + * + * Because this analysis is very uncertain, the code is + * being left in place and merely removed from the + * coverage test statistics. + */ + return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */ + } } else if (!entity) { dtd->keepProcessing = dtd->standalone; @@ -5206,11 +5317,15 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = ptr; - return XML_ERROR_BAD_CHAR_REF; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (!poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; @@ -5284,8 +5399,26 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, break; } if (entity->open) { - if (enc == encoding) - eventPtr = ptr; + if (enc == encoding) { + /* It does not appear that this line can be executed. + * + * The "if (entity->open)" check catches recursive entity + * definitions. In order to be called with an open + * entity, it must have gone through this code before and + * been through the recursive call to + * appendAttributeValue() some lines below. That call + * sets the local encoding ("enc") to the parser's + * internal encoding (internal_utf8 or internal_utf16), + * which can never be the same as the principle encoding. + * It doesn't appear there is another code path that gets + * here with entity->open being TRUE. + * + * Since it is not certain that this logic is watertight, + * we keep the line and merely exclude it from coverage + * tests. + */ + eventPtr = ptr; /* LCOV_EXCL_LINE */ + } return XML_ERROR_RECURSIVE_ENTITY_REF; } if (entity->notation) { @@ -5312,9 +5445,21 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, } break; default: + /* The only token returned by XmlAttributeValueTok() that does + * not have an explicit case here is XML_TOK_PARTIAL_CHAR. + * Getting that would require an entity name to contain an + * incomplete XML character (e.g. \xE2\x82); however previous + * tokenisers will have already recognised and rejected such + * names before XmlAttributeValueTok() gets a look-in. This + * default case should be retained as a safety net, but the code + * excluded from coverage tests. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = ptr; return XML_ERROR_UNEXPECTED_STATE; + /* LCOV_EXCL_STOP */ } ptr = next; } @@ -5447,12 +5592,15 @@ storeEntityValue(XML_Parser parser, goto endEntityValue; } n = XmlEncode(n, (ICHAR *)buf); - if (!n) { - if (enc == encoding) - eventPtr = entityTextPtr; - result = XML_ERROR_BAD_CHAR_REF; - goto endEntityValue; - } + /* The XmlEncode() functions can never return 0 here. That + * error return happens if the code point passed in is either + * negative or greater than or equal to 0x110000. The + * XmlCharRefNumber() functions will all return a number + * strictly less than 0x110000 or a negative value if an error + * occurred. The negative value is intercepted above, so + * XmlEncode() is never passed a value it might return an + * error for. + */ for (i = 0; i < n; i++) { if (pool->end == pool->ptr && !poolGrow(pool)) { result = XML_ERROR_NO_MEMORY; @@ -5473,10 +5621,18 @@ storeEntityValue(XML_Parser parser, result = XML_ERROR_INVALID_TOKEN; goto endEntityValue; default: + /* This default case should be unnecessary -- all the tokens + * that XmlEntityValueTok() can return have their own explicit + * cases -- but should be retained for safety. We do however + * exclude it from the coverage statistics. + * + * LCOV_EXCL_START + */ if (enc == encoding) eventPtr = entityTextPtr; result = XML_ERROR_UNEXPECTED_STATE; goto endEntityValue; + /* LCOV_EXCL_STOP */ } entityTextPtr = next; } @@ -5574,8 +5730,25 @@ reportDefault(XML_Parser parser, const ENCODING *enc, eventEndPP = &eventEndPtr; } else { + /* To get here, two things must be true; the parser must be + * using a character encoding that is not the same as the + * encoding passed in, and the encoding passed in must need + * conversion to the internal format (UTF-8 unless XML_UNICODE + * is defined). The only occasions on which the encoding passed + * in is not the same as the parser's encoding are when it is + * the internal encoding (e.g. a previously defined parameter + * entity, already converted to internal format). This by + * definition doesn't need conversion, so the whole branch never + * gets executed. + * + * For safety's sake we don't delete these lines and merely + * exclude them from coverage statistics. + * + * LCOV_EXCL_START + */ eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); + /* LCOV_EXCL_STOP */ } do { ICHAR *dataPtr = (ICHAR *)dataBuf; @@ -5744,9 +5917,30 @@ getContext(XML_Parser parser) len = dtd->defaultPrefix.binding->uriLen; if (namespaceSeparator) len--; - for (i = 0; i < len; i++) - if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) - return NULL; + for (i = 0; i < len; i++) { + if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) { + /* Because of memory caching, I don't believe this line can be + * executed. + * + * This is part of a loop copying the default prefix binding + * URI into the parser's temporary string pool. Previously, + * that URI was copied into the same string pool, with a + * terminating NUL character, as part of setContext(). When + * the pool was cleared, that leaves a block definitely big + * enough to hold the URI on the free block list of the pool. + * The URI copy in getContext() therefore cannot run out of + * memory. + * + * If the pool is used between the setContext() and + * getContext() calls, the worst it can do is leave a bigger + * block on the front of the free list. Given that this is + * all somewhat inobvious and program logic can be changed, we + * don't delete the line but we do exclude it from the test + * coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } + } needSep = XML_TRUE; } @@ -5758,8 +5952,15 @@ getContext(XML_Parser parser) PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (!prefix) break; - if (!prefix->binding) - continue; + if (!prefix->binding) { + /* This test appears to be (justifiable) paranoia. There does + * not seem to be a way of injecting a prefix without a binding + * that doesn't get errored long before this function is called. + * The test should remain for safety's sake, so we instead + * exclude the following line from the coverage statistics. + */ + continue; /* LCOV_EXCL_LINE */ + } if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) return NULL; for (s = prefix->name; *s; s++) @@ -6430,8 +6631,20 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { - if (!pool->ptr && !poolGrow(pool)) - return NULL; + if (!pool->ptr && !poolGrow(pool)) { + /* The following line is unreachable given the current usage of + * poolCopyStringN(). Currently it is called from exactly one + * place to copy the text of a simple general entity. By that + * point, the name of the entity is already stored in the pool, so + * pool->ptr cannot be NULL. + * + * If poolCopyStringN() is used elsewhere as it well might be, + * this line may well become executable again. Regardless, this + * sort of check shouldn't be removed lightly, so we just exclude + * it from the coverage statistics. + */ + return NULL; /* LCOV_EXCL_LINE */ + } for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) return NULL; @@ -6524,8 +6737,19 @@ poolGrow(STRING_POOL *pool) int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + // NOTE: Needs to be calculated prior to calling `realloc` + // to avoid dangling pointers: + const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start; + + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX/2 bytes have already been allocated. This isn't + * readily testable, since it is unlikely that an average + * machine will have that much memory, so we exclude it from the + * coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } bytesToAllocate = poolBytesToAllocateFor(blockSize); if (bytesToAllocate == 0) @@ -6537,7 +6761,7 @@ poolGrow(STRING_POOL *pool) return XML_FALSE; pool->blocks = temp; pool->blocks->size = blockSize; - pool->ptr = pool->blocks->s + (pool->ptr - pool->start); + pool->ptr = pool->blocks->s + offsetInsideBlock; pool->start = pool->blocks->s; pool->end = pool->start + blockSize; } @@ -6546,8 +6770,18 @@ poolGrow(STRING_POOL *pool) int blockSize = (int)(pool->end - pool->start); size_t bytesToAllocate; - if (blockSize < 0) - return XML_FALSE; + if (blockSize < 0) { + /* This condition traps a situation where either more than + * INT_MAX bytes have already been allocated (which is prevented + * by various pieces of program logic, not least this one, never + * mind the unlikelihood of actually having that much memory) or + * the pool control fields have been corrupted (which could + * conceivably happen in an extremely buggy user handler + * function). Either way it isn't readily testable, so we + * exclude it from the coverage statistics. + */ + return XML_FALSE; /* LCOV_EXCL_LINE */ + } if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; diff --git a/lib/libexpat/lib/xmlrole.c b/lib/libexpat/lib/xmlrole.c index a7c56302796..c809ee51482 100644 --- a/lib/libexpat/lib/xmlrole.c +++ b/lib/libexpat/lib/xmlrole.c @@ -170,7 +170,14 @@ prolog1(PROLOG_STATE *state, case XML_TOK_COMMENT: return XML_ROLE_COMMENT; case XML_TOK_BOM: - return XML_ROLE_NONE; + /* This case can never arise. To reach this role function, the + * parse must have passed through prolog0 and therefore have had + * some form of input, even if only a space. At that point, a + * byte order mark is no longer a valid character (though + * technically it should be interpreted as a non-breaking space), + * so will be rejected by the tokenizing stages. + */ + return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), @@ -1285,6 +1292,26 @@ declClose(PROLOG_STATE *state, return common(state, tok); } +/* This function will only be invoked if the internal logic of the + * parser has broken down. It is used in two cases: + * + * 1: When the XML prolog has been finished. At this point the + * processor (the parser level above these role handlers) should + * switch from prologProcessor to contentProcessor and reinitialise + * the handler function. + * + * 2: When an error has been detected (via common() below). At this + * point again the processor should be switched to errorProcessor, + * which will never call a handler. + * + * The result of this is that error() can only be called if the + * processor switch failed to happen, which is an internal error and + * therefore we shouldn't be able to provoke it simply by using the + * library. It is a necessary backstop, however, so we merely exclude + * it from the coverage statistics. + * + * LCOV_EXCL_START + */ static int PTRCALL error(PROLOG_STATE *UNUSED_P(state), int UNUSED_P(tok), @@ -1294,6 +1321,7 @@ error(PROLOG_STATE *UNUSED_P(state), { return XML_ROLE_NONE; } +/* LCOV_EXCL_STOP */ static int FASTCALL common(PROLOG_STATE *state, int tok) diff --git a/lib/libexpat/lib/xmltok.c b/lib/libexpat/lib/xmltok.c index cdf0720dd89..db4a5c8ca3e 100644 --- a/lib/libexpat/lib/xmltok.c +++ b/lib/libexpat/lib/xmltok.c @@ -1019,7 +1019,11 @@ streqci(const char *s1, const char *s2) if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) - c2 += ASCII_A - ASCII_a; + /* The following line will never get executed. streqci() is + * only called from two places, both of which guarantee to put + * upper-case strings into s2. + */ + c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ if (c1 != c2) return 0; if (!c1) @@ -1291,7 +1295,7 @@ XmlUtf8Encode(int c, char *buf) }; if (c < 0) - return 0; + return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ if (c < min2) { buf[0] = (char)(c | UTF8_cval1); return 1; @@ -1314,7 +1318,7 @@ XmlUtf8Encode(int c, char *buf) buf[3] = (char)((c & 0x3f) | 0x80); return 4; } - return 0; + return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ } int FASTCALL @@ -1465,6 +1469,9 @@ XmlInitUnknownEncoding(void *mem, else if (c < 0) { if (c < -4) return 0; + /* Multi-byte sequences need a converter function */ + if (!convert) + return 0; e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); e->utf8[i][0] = 0; e->utf16[i] = 0; diff --git a/lib/libexpat/lib/xmltok_impl.c b/lib/libexpat/lib/xmltok_impl.c index 5f779c0571b..4fa1ff679ce 100644 --- a/lib/libexpat/lib/xmltok_impl.c +++ b/lib/libexpat/lib/xmltok_impl.c @@ -1198,8 +1198,14 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1258,8 +1264,14 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *start; if (ptr >= end) return XML_TOK_NONE; - else if (! HAS_CHAR(enc, ptr, end)) - return XML_TOK_PARTIAL; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1614,6 +1626,14 @@ PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, return 0; } +/* This function does not appear to be called from anywhere within the + * library code. It is used via the macro XmlSameName(), which is + * defined but never used. Since it appears in the encoding function + * table, removing it is not a thing to be undertaken lightly. For + * the moment, we simply exclude it from coverage tests. + * + * LCOV_EXCL_START + */ static int PTRCALL PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { @@ -1677,14 +1697,21 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) } /* not reached */ } +/* LCOV_EXCL_STOP */ static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (end1 - ptr1 < MINBPC(enc)) - return 0; + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. THe incoming data has already + * been tokenized once, so imcomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } diff --git a/lib/libexpat/tests/chardata.c b/lib/libexpat/tests/chardata.c index 012499bb881..8e4e97a5f39 100644 --- a/lib/libexpat/tests/chardata.c +++ b/lib/libexpat/tests/chardata.c @@ -78,7 +78,7 @@ CharData_AppendXMLChars(CharData *storage, const XML_Char *s, int len) int CharData_CheckString(CharData *storage, const char *expected) { - char buffer[1280]; + char buffer[4096]; int len; int count; diff --git a/lib/libexpat/tests/chardata.h b/lib/libexpat/tests/chardata.h index e8dc4ce22c4..0db4b999b58 100644 --- a/lib/libexpat/tests/chardata.h +++ b/lib/libexpat/tests/chardata.h @@ -18,7 +18,7 @@ extern "C" { typedef struct { int count; /* # of chars, < 0 if not set */ - XML_Char data[1024]; + XML_Char data[2048]; } CharData; diff --git a/lib/libexpat/tests/minicheck.c b/lib/libexpat/tests/minicheck.c index 9003d3b7fd7..a6cae231b75 100644 --- a/lib/libexpat/tests/minicheck.c +++ b/lib/libexpat/tests/minicheck.c @@ -70,6 +70,32 @@ tcase_add_test(TCase *tc, tcase_test_function test) tc->ntests++; } +static void +tcase_free(TCase *tc) +{ + if (! tc) { + return; + } + + free(tc->tests); + free(tc); +} + +static void +suite_free(Suite *suite) +{ + if (! suite) { + return; + } + + while (suite->tests != NULL) { + TCase *next = suite->tests->next_tcase; + tcase_free(suite->tests); + suite->tests = next; + } + free(suite); +} + SRunner * srunner_create(Suite *suite) { @@ -175,6 +201,10 @@ srunner_ntests_failed(SRunner *runner) void srunner_free(SRunner *runner) { - free(runner->suite); + if (! runner) { + return; + } + + suite_free(runner->suite); free(runner); } diff --git a/lib/libexpat/tests/runtests.c b/lib/libexpat/tests/runtests.c index 5f5fde429fe..aa95fd377b8 100644 --- a/lib/libexpat/tests/runtests.c +++ b/lib/libexpat/tests/runtests.c @@ -26,6 +26,7 @@ #include "minicheck.h" #include "memcheck.h" #include "siphash.h" +#include "ascii.h" /* for ASCII_xxx */ #ifdef XML_LARGE_SIZE #define XML_FMT_INT_MOD "ll" @@ -127,6 +128,34 @@ _expect_failure(const char *text, enum XML_Error errorCode, const char *errorMes /* Dummy handlers for when we need to set a handler to tickle a bug, but it doesn't need to do anything. */ +static unsigned long dummy_handler_flags = 0; + +#define DUMMY_START_DOCTYPE_HANDLER_FLAG (1UL << 0) +#define DUMMY_END_DOCTYPE_HANDLER_FLAG (1UL << 1) +#define DUMMY_ENTITY_DECL_HANDLER_FLAG (1UL << 2) +#define DUMMY_NOTATION_DECL_HANDLER_FLAG (1UL << 3) +#define DUMMY_ELEMENT_DECL_HANDLER_FLAG (1UL << 4) +#define DUMMY_ATTLIST_DECL_HANDLER_FLAG (1UL << 5) +#define DUMMY_COMMENT_HANDLER_FLAG (1UL << 6) +#define DUMMY_PI_HANDLER_FLAG (1UL << 7) +#define DUMMY_START_ELEMENT_HANDLER_FLAG (1UL << 8) +#define DUMMY_START_CDATA_HANDLER_FLAG (1UL << 9) +#define DUMMY_END_CDATA_HANDLER_FLAG (1UL << 10) +#define DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG (1UL << 11) +#define DUMMY_START_NS_DECL_HANDLER_FLAG (1UL << 12) +#define DUMMY_END_NS_DECL_HANDLER_FLAG (1UL << 13) +#define DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG (1UL << 14) +#define DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG (1UL << 15) +#define DUMMY_SKIP_HANDLER_FLAG (1UL << 16) +#define DUMMY_DEFAULT_HANDLER_FLAG (1UL << 17) + + +static void XMLCALL +dummy_xdecl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) +{} static void XMLCALL dummy_start_doctype_handler(void *UNUSED_P(userData), @@ -134,11 +163,15 @@ dummy_start_doctype_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(sysid), const XML_Char *UNUSED_P(pubid), int UNUSED_P(has_internal_subset)) -{} +{ + dummy_handler_flags |= DUMMY_START_DOCTYPE_HANDLER_FLAG; +} static void XMLCALL dummy_end_doctype_handler(void *UNUSED_P(userData)) -{} +{ + dummy_handler_flags |= DUMMY_END_DOCTYPE_HANDLER_FLAG; +} static void XMLCALL dummy_entity_decl_handler(void *UNUSED_P(userData), @@ -150,7 +183,9 @@ dummy_entity_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId), const XML_Char *UNUSED_P(notationName)) -{} +{ + dummy_handler_flags |= DUMMY_ENTITY_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_notation_decl_handler(void *UNUSED_P(userData), @@ -158,13 +193,22 @@ dummy_notation_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(base), const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId)) -{} +{ + dummy_handler_flags |= DUMMY_NOTATION_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_element_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), - XML_Content *UNUSED_P(model)) -{} + XML_Content *model) +{ + /* The content model must be freed by the handler. Unfortunately + * we cannot pass the parser as the userData because this is used + * with other handlers that require other userData. + */ + XML_FreeContentModel(parser, model); + dummy_handler_flags |= DUMMY_ELEMENT_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_attlist_decl_handler(void *UNUSED_P(userData), @@ -173,39 +217,65 @@ dummy_attlist_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(att_type), const XML_Char *UNUSED_P(dflt), int UNUSED_P(isrequired)) -{} +{ + dummy_handler_flags |= DUMMY_ATTLIST_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data)) -{} +{ + dummy_handler_flags |= DUMMY_COMMENT_HANDLER_FLAG; +} static void XMLCALL dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data)) -{} +{ + dummy_handler_flags |= DUMMY_PI_HANDLER_FLAG; +} static void XMLCALL dummy_start_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts)) +{ + dummy_handler_flags |= DUMMY_START_ELEMENT_HANDLER_FLAG; +} + +static void XMLCALL +dummy_end_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name)) {} static void XMLCALL dummy_start_cdata_handler(void *UNUSED_P(userData)) -{} +{ + dummy_handler_flags |= DUMMY_START_CDATA_HANDLER_FLAG; +} static void XMLCALL dummy_end_cdata_handler(void *UNUSED_P(userData)) +{ + dummy_handler_flags |= DUMMY_END_CDATA_HANDLER_FLAG; +} + +static void XMLCALL +dummy_cdata_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(s), + int UNUSED_P(len)) {} static void XMLCALL dummy_start_namespace_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(prefix), const XML_Char *UNUSED_P(uri)) -{} +{ + dummy_handler_flags |= DUMMY_START_NS_DECL_HANDLER_FLAG; +} static void XMLCALL dummy_end_namespace_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(prefix)) -{} +{ + dummy_handler_flags |= DUMMY_END_NS_DECL_HANDLER_FLAG; +} /* This handler is obsolete, but while the code exists we should * ensure that dealing with the handler is covered by tests. @@ -217,8 +287,115 @@ dummy_unparsed_entity_decl_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId), const XML_Char *UNUSED_P(notationName)) +{ + dummy_handler_flags |= DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_default_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(s), + int UNUSED_P(len)) {} +static void XMLCALL +dummy_start_doctype_decl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(doctypeName), + const XML_Char *UNUSED_P(sysid), + const XML_Char *UNUSED_P(pubid), + int UNUSED_P(has_internal_subset)) +{ + dummy_handler_flags |= DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_end_doctype_decl_handler(void *UNUSED_P(userData)) +{ + dummy_handler_flags |= DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG; +} + +static void XMLCALL +dummy_skip_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(entityName), + int UNUSED_P(is_parameter_entity)) +{ + dummy_handler_flags |= DUMMY_SKIP_HANDLER_FLAG; +} + +/* Useful external entity handler */ +typedef struct ExtOption { + const char *system_id; + const char *parse_text; +} ExtOption; + +static int XMLCALL +external_entity_optioner(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + ExtOption *options = (ExtOption *)XML_GetUserData(parser); + XML_Parser ext_parser; + + while (options->parse_text != NULL) { + if (!strcmp(systemId, options->system_id)) { + enum XML_Status rc; + ext_parser = + XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + rc = _XML_Parse_SINGLE_BYTES(ext_parser, options->parse_text, + strlen(options->parse_text), + XML_TRUE); + XML_ParserFree(ext_parser); + return rc; + } + options++; + } + fail("No suitable option found"); + return XML_STATUS_ERROR; +} + +/* + * Parameter entity evaluation support. + */ +#define ENTITY_MATCH_FAIL (-1) +#define ENTITY_MATCH_NOT_FOUND (0) +#define ENTITY_MATCH_SUCCESS (1) +static const XML_Char *entity_name_to_match = NULL; +static const XML_Char *entity_value_to_match = NULL; +static int entity_match_flag = ENTITY_MATCH_NOT_FOUND; + +static void XMLCALL +param_entity_match_handler(void *UNUSED_P(userData), + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId), + const XML_Char *UNUSED_P(notationName)) +{ + if (!is_parameter_entity || + entity_name_to_match == NULL || + entity_value_to_match == NULL) { + return; + } + if (!strcmp(entityName, entity_name_to_match)) { + /* The cast here is safe because we control the horizontal and + * the vertical, and we therefore know our strings are never + * going to overflow an int. + */ + if (value_length != (int)strlen(entity_value_to_match) || + strncmp(value, entity_value_to_match, value_length)) { + entity_match_flag = ENTITY_MATCH_FAIL; + } else { + entity_match_flag = ENTITY_MATCH_SUCCESS; + } + } + /* Else leave the match flag alone */ +} /* * Character & encoding tests. @@ -313,6 +490,16 @@ START_TEST(test_bom_utf16_le) } END_TEST +/* Parse whole buffer at once to exercise a different code path */ +START_TEST(test_nobom_utf16_le) +{ + char text[] = " \0<\0e\0/\0>\0"; + + if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + static void XMLCALL accumulate_characters(void *userData, const XML_Char *s, int len) { @@ -324,9 +511,15 @@ accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name), const XML_Char **atts) { CharData *storage = (CharData *)userData; - if (storage->count < 0 && atts != NULL && atts[0] != NULL) { + + /* Check there are attributes to deal with */ + if (atts == NULL) + return; + + while (storage->count < 0 && atts[0] != NULL) { /* "accumulate" the value of the first attribute we see */ CharData_AppendXMLChars(storage, atts[1], -1); + atts += 2; } } @@ -365,6 +558,40 @@ _run_attribute_check(const XML_Char *text, const XML_Char *expected, #define run_attribute_check(text, expected) \ _run_attribute_check(text, expected, __FILE__, __LINE__) +typedef struct ExtTest { + const char *parse_text; + const char *encoding; + CharData *storage; +} ExtTest; + +static void XMLCALL +ext_accumulate_characters(void *userData, const XML_Char *s, int len) +{ + ExtTest *test_data = (ExtTest *)userData; + accumulate_characters(test_data->storage, s, len); +} + +static void +_run_ext_character_check(const XML_Char *text, + ExtTest *test_data, + const XML_Char *expected, + const char *file, int line) +{ + CharData storage; + + CharData_Init(&storage); + test_data->storage = &storage; + XML_SetUserData(parser, test_data); + XML_SetCharacterDataHandler(parser, ext_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + _xml_failure(parser, file, line); + CharData_CheckXMLChars(&storage, expected); +} + +#define run_ext_character_check(text, test_data, expected) \ + _run_ext_character_check(text, test_data, expected, __FILE__, __LINE__) + /* Regression test for SF bug #491986. */ START_TEST(test_danish_latin1) { @@ -522,18 +749,27 @@ END_TEST START_TEST(test_utf16) { /* <?xml version="1.0" encoding="UTF-16"?> - <doc a='123'>some text</doc> - */ + * <doc a='123'>some {A} text</doc> + * + * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A + */ char text[] = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" "\000'\000?\000>\000\n" - "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'" - "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/" - "\000d\000o\000c\000>"; + "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" + "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" + "<\000/\000d\000o\000c\000>"; + char expected[] = "some \357\274\241 text"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); } END_TEST @@ -560,6 +796,34 @@ START_TEST(test_utf16_le_epilog_newline) } END_TEST +/* Test that an outright lie in the encoding is faulted */ +START_TEST(test_not_utf16) +{ + const char *text = + "<?xml version='1.0' encoding='utf-16'?>" + "<doc>Hi</doc>"; + + /* Use a handler to provoke the appropriate code paths */ + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + expect_failure(text, + XML_ERROR_INCORRECT_ENCODING, + "UTF-16 declared in UTF-8 not faulted"); +} +END_TEST + +/* Test that an unknown encoding is rejected */ +START_TEST(test_bad_encoding) +{ + const char *text = "<doc>Hi</doc>"; + + if (!XML_SetEncoding(parser, "unknown-encoding")) + fail("XML_SetEncoding failed"); + expect_failure(text, + XML_ERROR_UNKNOWN_ENCODING, + "Unknown encoding not faulted"); +} +END_TEST + /* Regression test for SF bug #481609, #774028. */ START_TEST(test_latin1_umlauts) { @@ -574,6 +838,128 @@ START_TEST(test_latin1_umlauts) run_character_check(text, utf8); XML_ParserReset(parser, NULL); run_attribute_check(text, utf8); + /* Repeat with a default handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, dummy_default_handler); + run_character_check(text, utf8); + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, dummy_default_handler); + run_attribute_check(text, utf8); +} +END_TEST + +/* Test that an element name with a 4-byte UTF-8 character is rejected */ +START_TEST(test_long_utf8_character) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ + "<do\xf0\x90\x80\x80/>"; + expect_failure(text, + XML_ERROR_INVALID_TOKEN, + "4-byte UTF-8 character in element name not faulted"); +} +END_TEST + +/* Test that a long latin-1 attribute (too long to convert in one go) + * is correctly converted + */ +START_TEST(test_long_latin1_attribute) +{ + const char *text = + "<?xml version='1.0' encoding='iso-8859-1'?>\n" + "<doc att='" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + /* Last character splits across a buffer boundary */ + "\xe4'>\n</doc>"; + const char *expected = + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "\xc3\xa4"; + + run_attribute_check(text, expected); +} +END_TEST + + +/* Test that a long ASCII attribute (too long to convert in one go) + * is correctly converted + */ +START_TEST(test_long_ascii_attribute) +{ + const char *text = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<doc att='" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "01234'>\n</doc>"; + const char *expected = + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "01234"; + + run_attribute_check(text, expected); } END_TEST @@ -757,12 +1143,64 @@ START_TEST(test_really_long_lines) } END_TEST +/* Test cdata processing across a buffer boundary */ +START_TEST(test_really_long_encoded_lines) +{ + /* As above, except that we want to provoke an output buffer + * overflow with a non-trivial encoding. For this we need to pass + * the whole cdata in one go, not byte-by-byte. + */ + void *buffer; + const char *text = + "<?xml version='1.0' encoding='iso-8859-1'?>" + "<e>" + /* 64 chars */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + /* until we have at least 1024 characters on the line: */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" + "</e>"; + int parse_len = strlen(text); + + /* Need a cdata handler to provoke the code path we want to test */ + XML_SetCharacterDataHandler(parser, dummy_cdata_handler); + buffer = XML_GetBuffer(parser, parse_len); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, text, parse_len); + if (XML_ParseBuffer(parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + /* * Element event tests. */ static void XMLCALL +start_element_event_handler(void *userData, + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + CharData_AppendXMLChars((CharData *)userData, name, -1); +} + +static void XMLCALL end_element_event_handler(void *userData, const XML_Char *name) { CharData *storage = (CharData *) userData; @@ -912,6 +1350,31 @@ START_TEST(test_xmldecl_misplaced) } END_TEST +START_TEST(test_xmldecl_invalid) +{ + expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", + XML_ERROR_XML_DECL, + "Failed to report invalid XML declaration"); +} +END_TEST + +START_TEST(test_xmldecl_missing_attr) +{ + expect_failure("<?xml ='1.0'?>\n<doc/>\n", + XML_ERROR_XML_DECL, + "Failed to report missing XML declaration attribute"); +} +END_TEST + +START_TEST(test_xmldecl_missing_value) +{ + expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" + "<doc/>", + XML_ERROR_XML_DECL, + "Failed to report missing attribute value"); +} +END_TEST + /* Regression test for SF bug #584832. */ static int XMLCALL UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info) @@ -974,44 +1437,170 @@ END_TEST /* Regression test for SF bug #620106. */ static int XMLCALL -external_entity_loader_set_encoding(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - /* This text says it's an unsupported encoding, but it's really - UTF-8, which we tell Expat using XML_SetEncoding(). - */ - const char *text = - "<?xml encoding='iso-8859-3'?>" - "\xC3\xA9"; +external_entity_loader(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtTest *test_data = (ExtTest *)XML_GetUserData(parser); XML_Parser extparser; extparser = XML_ExternalEntityParserCreate(parser, context, NULL); if (extparser == NULL) fail("Could not create external entity parser."); - if (!XML_SetEncoding(extparser, "utf-8")) - fail("XML_SetEncoding() ignored for external entity"); - if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if ( _XML_Parse_SINGLE_BYTES(extparser, + test_data->parse_text, + strlen(test_data->parse_text), + XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(parser); - return 0; + xml_failure(extparser); + return XML_STATUS_ERROR; } - return 1; + XML_ParserFree(extparser); + return XML_STATUS_OK; } START_TEST(test_ext_entity_set_encoding) { const char *text = "<!DOCTYPE doc [\n" - " <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" "]>\n" "<doc>&en;</doc>"; + ExtTest test_data = { + /* This text says it's an unsupported encoding, but it's really + UTF-8, which we tell Expat using XML_SetEncoding(). + */ + "<?xml encoding='iso-8859-3'?>\xC3\xA9", + "utf-8", + NULL + }; - XML_SetExternalEntityRefHandler(parser, - external_entity_loader_set_encoding); - run_character_check(text, "\xC3\xA9"); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + run_ext_character_check(text, &test_data, "\xC3\xA9"); +} +END_TEST + +/* Test external entities with no handler */ +START_TEST(test_ext_entity_no_handler) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + + XML_SetDefaultHandler(parser, dummy_default_handler); + run_character_check(text, ""); +} +END_TEST + +/* Test UTF-8 BOM is accepted */ +START_TEST(test_ext_entity_set_bom) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest test_data = { + "\xEF\xBB\xBF" /* BOM */ + "<?xml encoding='iso-8859-3'?>" + "\xC3\xA9", + "utf-8", + NULL + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + run_ext_character_check(text, &test_data, "\xC3\xA9"); +} +END_TEST + + +/* Test that bad encodings are faulted */ +typedef struct ext_faults +{ + const char *parse_text; + const char *fail_text; + const char *encoding; + enum XML_Error error; +} ExtFaults; + +static int XMLCALL +external_entity_faulter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + XML_Parser ext_parser; + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (fault->encoding != NULL) { + if (!XML_SetEncoding(ext_parser, fault->encoding)) + fail("XML_SetEncoding failed"); + } + if (_XML_Parse_SINGLE_BYTES(ext_parser, + fault->parse_text, + strlen(fault->parse_text), + XML_TRUE) != XML_STATUS_ERROR) + fail(fault->fail_text); + if (XML_GetErrorCode(ext_parser) != fault->error) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; +} + +START_TEST(test_ext_entity_bad_encoding) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtFaults fault = { + "<?xml encoding='iso-8859-3'?>u", + "Unsupported encoding not faulted", + "unknown", + XML_ERROR_UNKNOWN_ENCODING + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad encoding should not have been accepted"); +} +END_TEST + +/* Try handing an invalid encoding to an external entity parser */ +START_TEST(test_ext_entity_bad_encoding_2) +{ + const char *text = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc>&entity;</doc>"; + ExtFaults fault = { + "<!ELEMENT doc (#PCDATA)*>", + "Unknown encoding not faulted", + "unknown-encoding", + XML_ERROR_UNKNOWN_ENCODING + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad encoding not faulted in external entity handler"); } END_TEST @@ -1053,27 +1642,6 @@ START_TEST(test_wfc_undeclared_entity_standalone) { } END_TEST -static int XMLCALL -external_entity_loader(XML_Parser parser, - const XML_Char *context, - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - char *text = (char *)XML_GetUserData(parser); - XML_Parser extparser; - - extparser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (extparser == NULL) - fail("Could not create external entity parser."); - if ( _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE) - == XML_STATUS_ERROR) { - xml_failure(parser); - return XML_STATUS_ERROR; - } - return XML_STATUS_OK; -} - /* Test that an error is reported for unknown entities if we have read an external subset, and standalone is true. */ @@ -1082,11 +1650,14 @@ START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" "<!DOCTYPE doc SYSTEM 'foo'>\n" "<doc>&entity;</doc>"; - char foo_text[] = - "<!ELEMENT doc (#PCDATA)*>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, @@ -1094,6 +1665,26 @@ START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { } END_TEST +/* Test that external entity handling is not done if the parsing flag + * is set to UNLESS_STANDALONE + */ +START_TEST(test_entity_with_external_subset_unless_standalone) { + const char *text = + "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc>&entity;</doc>"; + ExtTest test_data = { "<!ENTITY entity 'bar'>", NULL, NULL }; + + XML_SetParamEntityParsing(parser, + XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + expect_failure(text, + XML_ERROR_UNDEFINED_ENTITY, + "Parser did not report undefined entity"); +} +END_TEST + /* Test that no error is reported for unknown entities if we have read an external subset, and standalone is false. */ @@ -1102,14 +1693,15 @@ START_TEST(test_wfc_undeclared_entity_with_external_subset) { "<?xml version='1.0' encoding='us-ascii'?>\n" "<!DOCTYPE doc SYSTEM 'foo'>\n" "<doc>&entity;</doc>"; - char foo_text[] = - "<!ELEMENT doc (#PCDATA)*>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); XML_SetExternalEntityRefHandler(parser, external_entity_loader); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + run_ext_character_check(text, &test_data, ""); } END_TEST @@ -1126,15 +1718,24 @@ START_TEST(test_not_standalone_handler_reject) "<?xml version='1.0' encoding='us-ascii'?>\n" "<!DOCTYPE doc SYSTEM 'foo'>\n" "<doc>&entity;</doc>"; - char foo_text[] = - "<!ELEMENT doc (#PCDATA)*>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - fail("NotStandalone handler failed to reject"); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalone handler failed to reject"); + + /* Try again but without external entity handling */ + XML_ParserReset(parser, NULL); + XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalone handler failed to reject"); } END_TEST @@ -1151,15 +1752,21 @@ START_TEST(test_not_standalone_handler_accept) "<?xml version='1.0' encoding='us-ascii'?>\n" "<!DOCTYPE doc SYSTEM 'foo'>\n" "<doc>&entity;</doc>"; - char foo_text[] = - "<!ELEMENT doc (#PCDATA)*>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, foo_text); XML_SetExternalEntityRefHandler(parser, external_entity_loader); XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + run_ext_character_check(text, &test_data, ""); + + /* Repeat wtihout the external entity handler */ + XML_ParserReset(parser, NULL); + XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler); + run_character_check(text, ""); } END_TEST @@ -1177,13 +1784,57 @@ START_TEST(test_wfc_no_recursive_entity_refs) } END_TEST +/* Test incomplete external entities are faulted */ +START_TEST(test_ext_entity_invalid_parse) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + const ExtFaults faults[] = { + { + "<", + "Incomplete element declaration not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "<\xe2\x82", /* First two bytes of a three-byte char */ + "Incomplete character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "<tag>\xe2\x82", + "Incomplete character in CDATA not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + const ExtFaults *fault = faults; + + for (; fault->parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, (void *)fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Parser did not report external entity error"); + XML_ParserReset(parser, NULL); + } +} +END_TEST + + /* Regression test for SF bug #483514. */ START_TEST(test_dtd_default_handling) { const char *text = "<!DOCTYPE doc [\n" - "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n" - "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n" + "<!ENTITY e SYSTEM 'http://example.org/e'>\n" + "<!NOTATION n SYSTEM 'http://example.org/n'>\n" "<!ELEMENT doc EMPTY>\n" "<!ATTLIST doc a CDATA #IMPLIED>\n" "<?pi in dtd?>\n" @@ -1205,6 +1856,107 @@ START_TEST(test_dtd_default_handling) } END_TEST +/* Test handling of attribute declarations */ +typedef struct AttTest { + const XML_Char *definition; + const XML_Char *element_name; + const XML_Char *attr_name; + const XML_Char *attr_type; + const XML_Char *default_value; + int is_required; +} AttTest; + +static void XMLCALL +verify_attlist_decl_handler(void *userData, + const XML_Char *element_name, + const XML_Char *attr_name, + const XML_Char *attr_type, + const XML_Char *default_value, + int is_required) +{ + AttTest *at = (AttTest *)userData; + + if (strcmp(element_name, at->element_name)) + fail("Unexpected element name in attribute declaration"); + if (strcmp(attr_name, at->attr_name)) + fail("Unexpected attribute name in attribute declaration"); + if (strcmp(attr_type, at->attr_type)) + fail("Unexpected attribute type in attribute declaration"); + if ((default_value == NULL && at->default_value != NULL) || + (default_value != NULL && at->default_value == NULL) || + (default_value != NULL && strcmp(default_value, at->default_value))) + fail("Unexpected default value in attribute declaration"); + if (is_required != at->is_required) + fail("Requirement mismatch in attribute declaration"); +} + +START_TEST(test_dtd_attr_handling) +{ + const char *prolog = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc EMPTY>\n"; + AttTest attr_data[] = { + { + "<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" + "]>" + "<doc a='two'/>", + "doc", + "a", + "(one|two|three)", /* Extraneous spaces will be removed */ + NULL, + XML_TRUE + }, + { + "<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" + "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" + "]>" + "<doc/>", + "doc", + "a", + "NOTATION(foo)", + NULL, + XML_FALSE + }, + { + "<!ATTLIST doc a NOTATION (foo) 'bar'>\n" + "]>" + "<doc/>", + "doc", + "a", + "NOTATION(foo)", + "bar", + XML_FALSE + }, + { + "<!ATTLIST doc a CDATA '\xdb\xb2'>\n" + "]>" + "<doc/>", + "doc", + "a", + "CDATA", + "\xdb\xb2", + XML_FALSE + }, + { NULL, NULL, NULL, NULL, NULL, XML_FALSE } + }; + AttTest *test; + + for (test = attr_data; test->definition != NULL; test++) { + XML_SetAttlistDeclHandler(parser, verify_attlist_decl_handler); + XML_SetUserData(parser, test); + if (_XML_Parse_SINGLE_BYTES(parser, prolog, strlen(prolog), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + if (_XML_Parse_SINGLE_BYTES(parser, + test->definition, + strlen(test->definition), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + XML_ParserReset(parser, NULL); + } +} +END_TEST + /* See related SF bug #673791. When namespace processing is enabled, setting the namespace URI for a prefix is not allowed; this test ensures that it *is* allowed @@ -1214,7 +1966,7 @@ END_TEST START_TEST(test_empty_ns_without_namespaces) { const char *text = - "<doc xmlns:prefix='http://www.example.com/'>\n" + "<doc xmlns:prefix='http://example.org/'>\n" " <e xmlns:prefix=''/>\n" "</doc>"; @@ -1232,7 +1984,7 @@ START_TEST(test_ns_in_attribute_default_without_namespaces) const char *text = "<!DOCTYPE e:element [\n" " <!ATTLIST e:element\n" - " xmlns:e CDATA 'http://example.com/'>\n" + " xmlns:e CDATA 'http://example.org/'>\n" " ]>\n" "<e:element/>"; @@ -1391,6 +2143,20 @@ START_TEST(test_good_cdata_ascii) CharData_Init(&storage); XML_SetUserData(parser, &storage); XML_SetCharacterDataHandler(parser, accumulate_characters); + /* Add start and end handlers for coverage */ + XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler); + XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler); + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); + + /* Try again, this time with a default handler */ + XML_ParserReset(parser, NULL); + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetDefaultHandler(parser, dummy_default_handler); if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); @@ -1423,6 +2189,168 @@ START_TEST(test_good_cdata_utf16) } END_TEST +START_TEST(test_good_cdata_utf16_le) +{ + /* Test data is: + * <?xml version='1.0' encoding='utf-16'?> + * <a><![CDATA[hello]]></a> + */ + const char text[] = + "<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; + const char *expected = "hello"; + + CharData storage; + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test UTF16 conversion of a long cdata string */ + +/* 16 characters: handy macro to reduce visual clutter */ +#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" + +START_TEST(test_long_cdata_utf16) +{ + /* Test data is: + * <?xlm version='1.0' encoding='utf-16'?> + * <a><![CDATA[ + * ABCDEFGHIJKLMNOP + * ]]></a> + */ + const char text[] = + "\0<\0?\0x\0m\0l\0 " + "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " + "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + /* 64 characters per line */ + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 + A_TO_P_IN_UTF16 + "\0]\0]\0>\0<\0/\0a\0>"; + const char *expected = + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOP"; + CharData storage; + void *buffer; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + buffer = XML_GetBuffer(parser, sizeof(text) - 1); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, text, sizeof(text) - 1); + if (XML_ParseBuffer(parser, + sizeof(text) - 1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test handling of multiple unit UTF-16 characters */ +START_TEST(test_multichar_cdata_utf16) +{ + /* Test data is: + * <?xml version='1.0' encoding='utf-16'?> + * <a><![CDATA[{MINIM}{CROTCHET}]]></a> + * + * where {MINIM} is U+1d15e (a minim or half-note) + * UTF-16: 0xd834 0xdd5e + * UTF-8: 0xf0 0x9d 0x85 0x9e + * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) + * UTF-16: 0xd834 0xdd5e + * UTF-8: 0xf0 0x9d 0x85 0x9e + */ + const char text[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" + "\0]\0]\0>\0<\0/\0a\0>"; + const char *expected = "\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that an element name with a UTF-16 surrogate pair is rejected */ +START_TEST(test_utf16_bad_surrogate_pair) +{ + /* Test data is: + * <?xml version='1.0' encoding='utf-16'?> + * <a><![CDATA[{BADLINB}]]></a> + * + * where {BADLINB} is U+10000 (the first Linear B character) + * with the UTF-16 surrogate pair in the wrong order, i.e. + * 0xdc00 0xd800 + */ + const char text[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" + "\xdc\x00\xd8\x00" + "\0]\0]\0>\0<\0/\0a\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Reversed UTF-16 surrogate pair not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); +} +END_TEST + + START_TEST(test_bad_cdata) { struct CaseData { @@ -1477,6 +2405,144 @@ START_TEST(test_bad_cdata) } END_TEST +/* Test failures in UTF-16 CDATA */ +START_TEST(test_bad_cdata_utf16) +{ + struct CaseData { + size_t text_bytes; + const char *text; + enum XML_Error expected_error; + }; + + const char prolog[] = + "\0<\0?\0x\0m\0l\0" + " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'" + "\0?\0>\0\n" + "\0<\0a\0>"; + struct CaseData cases[] = { + {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, + {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, + {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, + {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, + {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, + {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, + {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, + {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, + {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, + {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, + {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, + {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, + {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, + {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, + {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, + {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, + {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, + {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + /* Now add a four-byte UTF-16 character */ + {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", + XML_ERROR_UNCLOSED_CDATA_SECTION}, + {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", + XML_ERROR_PARTIAL_CHAR}, + {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", + XML_ERROR_PARTIAL_CHAR}, + {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", + XML_ERROR_UNCLOSED_CDATA_SECTION} + }; + size_t i; + + for (i = 0; i < sizeof(cases)/sizeof(struct CaseData); i++) { + enum XML_Status actual_status; + enum XML_Error actual_error; + + if (_XML_Parse_SINGLE_BYTES(parser, prolog, sizeof(prolog)-1, + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + actual_status = _XML_Parse_SINGLE_BYTES(parser, + cases[i].text, + cases[i].text_bytes, + XML_TRUE); + assert(actual_status == XML_STATUS_ERROR); + actual_error = XML_GetErrorCode(parser); + if (actual_error != cases[i].expected_error) { + char message[1024]; + + sprintf(message, + "Expected error %d (%s), got %d (%s) for case %lu\n", + cases[i].expected_error, + XML_ErrorString(cases[i].expected_error), + actual_error, + XML_ErrorString(actual_error), + (long unsigned)(i+1)); + fail(message); + } + XML_ParserReset(parser, NULL); + } +} +END_TEST + +static const char *long_cdata_text = + "<s><![CDATA[" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "012345678901234567890123456789012345678901234567890123456789" + "]]></s>"; + +/* Test stopping the parser in cdata handler */ +START_TEST(test_stop_parser_between_cdata_calls) +{ + const char *text = long_cdata_text; + + XML_SetCharacterDataHandler(parser, + clearing_aborting_character_handler); + resumable = XML_FALSE; + expect_failure(text, XML_ERROR_ABORTED, + "Parse not aborted in CDATA handler"); +} +END_TEST + +/* Test suspending the parser in cdata handler */ +START_TEST(test_suspend_parser_between_cdata_calls) +{ + const char *text = long_cdata_text; + enum XML_Status result; + + XML_SetCharacterDataHandler(parser, + clearing_aborting_character_handler); + resumable = XML_TRUE; + result = _XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE); + if (result != XML_STATUS_SUSPENDED) { + if (result == XML_STATUS_ERROR) + xml_failure(parser); + fail("Parse not suspended in CDATA handler"); + } + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + xml_failure(parser); +} +END_TEST + /* Test memory allocation functions */ START_TEST(test_memory_allocation) { @@ -1537,10 +2603,19 @@ record_cdata_nodefault_handler(void *userData, CharData_AppendString((CharData *)userData, "c"); } +static void XMLCALL +record_skip_handler(void *userData, + const XML_Char *UNUSED_P(entityName), + int is_parameter_entity) +{ + CharData_AppendString((CharData *)userData, + is_parameter_entity ? "E" : "e"); +} + /* Test XML_DefaultCurrent() passes handling on correctly */ START_TEST(test_default_current) { - const char *text = "<doc>hello</doc>"; + const char *text = "<doc>hell]</doc>"; const char *entity_text = "<!DOCTYPE doc [\n" "<!ENTITY entity '%'>\n" @@ -1580,6 +2655,19 @@ START_TEST(test_default_current) /* The default handler suppresses the entity */ CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDDD"); + /* Again, with a skip handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, record_default_handler); + XML_SetCharacterDataHandler(parser, record_cdata_handler); + XML_SetSkippedEntityHandler(parser, record_skip_handler); + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + /* The default handler suppresses the entity */ + CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDeD"); + /* This time, allow the entity through */ XML_ParserReset(parser, NULL); XML_SetDefaultHandlerExpand(parser, record_default_handler); @@ -1628,13 +2716,109 @@ START_TEST(test_set_foreign_dtd) "<?xml version='1.0' encoding='us-ascii'?>\n"; const char *text2 = "<doc>&entity;</doc>"; - char dtd_text[] = "<!ELEMENT doc (#PCDATA)*>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; + + /* Check hash salt is passed through too */ + XML_SetHashSalt(parser, 0x12345678); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + /* Add a default handler to exercise more code paths */ + XML_SetDefaultHandler(parser, dummy_default_handler); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Ensure that trying to set the DTD after parsing has started + * is faulted, even if it's the same setting. + */ + if (XML_UseForeignDTD(parser, XML_TRUE) != + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) + fail("Failed to reject late foreign DTD setting"); + /* Ditto for the hash salt */ + if (XML_SetHashSalt(parser, 0x23456789)) + fail("Failed to reject late hash salt change"); + + /* Now finish the parse */ + if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test foreign DTD handling with a failing NotStandalone handler */ +START_TEST(test_foreign_dtd_not_standalone) +{ + const char *text = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<doc>&entity;</doc>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + expect_failure(text, XML_ERROR_NOT_STANDALONE, + "NotStandalonehandler failed to reject"); +} +END_TEST + +/* Test invalid character in a foreign DTD is faulted */ +START_TEST(test_invalid_foreign_dtd) +{ + const char *text = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<doc>&entity;</doc>"; + ExtFaults test_data = { + "$", + "Dollar not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &test_data); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_UseForeignDTD(parser, XML_TRUE); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad DTD should not have been accepted"); +} +END_TEST + +/* Test foreign DTD use with a doctype */ +START_TEST(test_foreign_dtd_with_doctype) +{ + const char *text1 = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; + const char *text2 = + "<doc>&entity;</doc>"; + ExtTest test_data = { + "<!ELEMENT doc (#PCDATA)*>", + NULL, + NULL + }; /* Check hash salt is passed through too */ XML_SetHashSalt(parser, 0x12345678); XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetUserData(parser, dtd_text); + XML_SetUserData(parser, &test_data); XML_SetExternalEntityRefHandler(parser, external_entity_loader); + /* Add a default handler to exercise more code paths */ + XML_SetDefaultHandler(parser, dummy_default_handler); if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) fail("Could not set foreign DTD"); if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), @@ -1644,7 +2828,8 @@ START_TEST(test_set_foreign_dtd) /* Ensure that trying to set the DTD after parsing has started * is faulted, even if it's the same setting. */ - if (XML_UseForeignDTD(parser, XML_TRUE) == XML_ERROR_NONE) + if (XML_UseForeignDTD(parser, XML_TRUE) != + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) fail("Failed to reject late foreign DTD setting"); /* Ditto for the hash salt */ if (XML_SetHashSalt(parser, 0x23456789)) @@ -1657,6 +2842,47 @@ START_TEST(test_set_foreign_dtd) } END_TEST +/* Test XML_UseForeignDTD with no external subset present */ +static int XMLCALL +external_entity_null_loader(XML_Parser UNUSED_P(parser), + const XML_Char *UNUSED_P(context), + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + return XML_STATUS_OK; +} + +START_TEST(test_foreign_dtd_without_external_subset) +{ + const char *text = + "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" + "<doc>&foo;</doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, NULL); + XML_SetExternalEntityRefHandler(parser, external_entity_null_loader); + XML_UseForeignDTD(parser, XML_TRUE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_empty_foreign_dtd) +{ + const char *text = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<doc>&entity;</doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_null_loader); + XML_UseForeignDTD(parser, XML_TRUE); + expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, + "Undefined entity not faulted"); +} +END_TEST + /* Test XML Base is set and unset appropriately */ START_TEST(test_set_base) { @@ -1705,7 +2931,12 @@ counting_start_element_handler(void *userData, } if (info->name == NULL) fail("Element not recognised"); - /* Note attribute count is doubled */ + /* The attribute count is twice what you might expect. It is a + * count of items in atts, an array which contains alternating + * attribute names and attribute values. For the naive user this + * is possibly a little unexpected, but it is what the + * documentation in expat.h tells us to expect. + */ count = XML_GetSpecifiedAttributeCount(parser); if (info->attr_count * 2 != count) { fail("Not got expected attribute count"); @@ -1735,6 +2966,7 @@ counting_start_element_handler(void *userData, fail("Attribute has wrong value"); return; } + /* Remember, two entries in atts per attribute (see above) */ atts += 2; } } @@ -1839,6 +3071,23 @@ START_TEST(test_resume_resuspended) } END_TEST +/* Test that CDATA shows up correctly through a default handler */ +START_TEST(test_cdata_default) +{ + const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetDefaultHandler(parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, text); +} +END_TEST + /* Test resetting a subordinate parser does exactly nothing */ static int XMLCALL external_entity_resetter(XML_Parser parser, @@ -1881,12 +3130,13 @@ external_entity_resetter(XML_Parser parser, fail("Parsing status not still FINISHED"); return XML_STATUS_ERROR; } + XML_ParserFree(ext_parser); return XML_STATUS_OK; } START_TEST(test_subordinate_reset) { - const char *text = + const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" "<!DOCTYPE doc SYSTEM 'foo'>\n" "<doc>&entity;</doc>"; @@ -1936,6 +3186,7 @@ external_entity_suspender(XML_Parser parser, xml_failure(ext_parser); return XML_STATUS_ERROR; } + XML_ParserFree(ext_parser); return XML_STATUS_OK; } @@ -1953,6 +3204,169 @@ START_TEST(test_subordinate_suspend) } END_TEST +/* Test suspending a subordinate parser from an XML declaration */ +/* Increases code coverage of the tests */ +static void XMLCALL +entity_suspending_xdecl_handler(void *userData, + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) +{ + XML_Parser ext_parser = (XML_Parser)userData; + + XML_StopParser(ext_parser, resumable); + XML_SetXmlDeclHandler(ext_parser, NULL); +} + +static int XMLCALL +external_entity_suspend_xmldecl(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "<?xml version='1.0' encoding='us-ascii'?>"; + XML_Parser ext_parser; + XML_ParsingStatus status; + enum XML_Status rc; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + rc = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), XML_TRUE); + XML_GetParsingStatus(ext_parser, &status); + if (resumable) { + if (rc == XML_STATUS_ERROR) + xml_failure(ext_parser); + if (status.parsing != XML_SUSPENDED) + fail("Ext Parsing status not SUSPENDED"); + } else { + if (rc != XML_STATUS_ERROR) + fail("Ext parsing not aborted"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) + xml_failure(ext_parser); + if (status.parsing != XML_FINISHED) + fail("Ext Parsing status not FINISHED"); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_subordinate_xdecl_suspend) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&entity;</doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_suspend_xmldecl); + resumable = XML_TRUE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_subordinate_xdecl_abort) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&entity;</doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_suspend_xmldecl); + resumable = XML_FALSE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test external entity fault handling with suspension */ +static int XMLCALL +external_entity_suspending_faulter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + XML_Parser ext_parser; + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); + void *buffer; + int parse_len = strlen(fault->parse_text); + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + resumable = XML_TRUE; + buffer = XML_GetBuffer(ext_parser, parse_len); + if (buffer == NULL) + fail("Could not allocate parse buffer"); + memcpy(buffer, fault->parse_text, parse_len); + if (XML_ParseBuffer(ext_parser, parse_len, + XML_FALSE) != XML_STATUS_SUSPENDED) + fail("XML declaration did not suspend"); + if (XML_ResumeParser(ext_parser) != XML_STATUS_OK) + xml_failure(ext_parser); + if (XML_ParseBuffer(ext_parser, 0, XML_TRUE) != XML_STATUS_ERROR) + fail(fault->fail_text); + if (XML_GetErrorCode(ext_parser) != fault->error) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; +} + +START_TEST(test_ext_entity_invalid_suspended_parse) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtFaults faults[] = { + { + "<?xml version='1.0' encoding='us-ascii'?><", + "Incomplete element declaration not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + /* First two bytes of a three-byte char */ + "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", + "Incomplete character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + ExtFaults *fault; + + for (fault = &faults[0]; fault->parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_suspending_faulter); + XML_SetUserData(parser, fault); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Parser did not report external entity error"); + XML_ParserReset(parser, NULL); + } +} +END_TEST + + /* Test setting an explicit encoding */ START_TEST(test_explicit_encoding) @@ -1960,7 +3374,10 @@ START_TEST(test_explicit_encoding) const char *text1 = "<doc>Hello "; const char *text2 = " World</doc>"; - /* First say we are UTF-8 */ + /* Just check that we can set the encoding to NULL before starting */ + if (XML_SetEncoding(parser, NULL) != XML_STATUS_OK) + fail("Failed to initialise encoding to NULL"); + /* Say we are UTF-8 */ if (XML_SetEncoding(parser, "utf-8") != XML_STATUS_OK) fail("Failed to set explicit encoding"); if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), @@ -1978,6 +3395,268 @@ START_TEST(test_explicit_encoding) } END_TEST + +/* Test handling of trailing CR (rather than newline) */ +static void XMLCALL +cr_cdata_handler(void *userData, const XML_Char *s, int len) +{ + int *pfound = (int *)userData; + + /* Internal processing turns the CR into a newline for the + * character data handler, but not for the default handler + */ + if (len == 1 && (*s == '\n' || *s == '\r')) + *pfound = 1; +} + +START_TEST(test_trailing_cr) +{ + const char *text = "<doc>\r"; + int found_cr; + + /* Try with a character handler, for code coverage */ + XML_SetCharacterDataHandler(parser, cr_cdata_handler); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_cr == 0) + fail("Did not catch the carriage return"); + XML_ParserReset(parser, NULL); + + /* Now with a default handler instead */ + XML_SetDefaultHandler(parser, cr_cdata_handler); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_cr == 0) + fail("Did not catch default carriage return"); +} +END_TEST + +/* Test trailing CR in an external entity parse */ +static int XMLCALL +external_entity_cr_catcher(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "\r"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetCharacterDataHandler(ext_parser, cr_cdata_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +static int XMLCALL +external_entity_bad_cr_catcher(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "<tag>\r"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetCharacterDataHandler(ext_parser, cr_cdata_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + fail("Async entity error not caught"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) + xml_failure(ext_parser); + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_trailing_cr) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + int found_cr; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_cr_catcher); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + if (found_cr == 0) + fail("No carriage return found"); + XML_ParserReset(parser, NULL); + + /* Try again with a different trailing CR */ + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_bad_cr_catcher); + XML_SetUserData(parser, &found_cr); + found_cr = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + if (found_cr == 0) + fail("No carriage return found"); +} +END_TEST + +/* Test handling of trailing square bracket */ +static void XMLCALL +rsqb_handler(void *userData, const XML_Char *s, int len) +{ + int *pfound = (int *)userData; + + if (len == 1 && *s == ']') + *pfound = 1; +} + +START_TEST(test_trailing_rsqb) +{ + const char *text8 = "<doc>]"; + const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; + int found_rsqb; + int text8_len = strlen(text8); + + XML_SetCharacterDataHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text8, text8_len, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); + + /* Try again with a different encoding */ + XML_ParserReset(parser, NULL); + XML_SetCharacterDataHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text16, sizeof(text16)-1, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); + + /* And finally with a default handler */ + XML_ParserReset(parser, NULL); + XML_SetDefaultHandler(parser, rsqb_handler); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text16, sizeof(text16)-1, + XML_TRUE) == XML_STATUS_OK) + fail("Failed to fault unclosed doc"); + if (found_rsqb == 0) + fail("Did not catch the right square bracket"); +} +END_TEST + +/* Test trailing right square bracket in an external entity parse */ +static int XMLCALL +external_entity_rsqb_catcher(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "<tag>]"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetCharacterDataHandler(ext_parser, rsqb_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Async entity error not caught"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ASYNC_ENTITY) + xml_failure(ext_parser); + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_trailing_rsqb) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + int found_rsqb; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_rsqb_catcher); + XML_SetUserData(parser, &found_rsqb); + found_rsqb = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); + if (found_rsqb == 0) + fail("No right square bracket found"); +} +END_TEST + +/* Test CDATA handling in an external entity */ +static int XMLCALL +external_entity_good_cdata_ascii(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = + "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; + const char *expected = "<greeting>Hello, world!</greeting>"; + CharData storage; + XML_Parser ext_parser; + + CharData_Init(&storage); + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + XML_SetUserData(ext_parser, &storage); + XML_SetCharacterDataHandler(ext_parser, accumulate_characters); + + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + CharData_CheckXMLChars(&storage, expected); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_good_cdata) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_good_cdata_ascii); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_OK) + xml_failure(parser); +} +END_TEST + /* Test user parameter settings */ /* Variable holding the expected handler userData */ static void *handler_data = NULL; @@ -1997,7 +3676,7 @@ xml_decl_handler(void *userData, if (userData != handler_data) fail("User data (xml decl) not correctly set"); if (standalone != -1) - fail("Standalone not show as not present"); + fail("Standalone not flagged as not present in XML decl"); xdecl_count++; } @@ -2045,6 +3724,7 @@ external_entity_param_checker(XML_Parser parser, return XML_STATUS_ERROR; } handler_data = parser; + XML_ParserFree(ext_parser); return XML_STATUS_OK; } @@ -2092,16 +3772,35 @@ END_TEST /* Test that an explicit external entity handler argument replaces * the parser as the first argument. + * + * We do not call the first parameter to the external entity handler + * 'parser' for once, since the first time the handler is called it + * will actually be a text string. We need to be able to access the + * global 'parser' variable to create our external entity parser from, + * since there are code paths we need to ensure get executed. */ static int XMLCALL -external_entity_ref_param_checker(XML_Parser parser, - const XML_Char *UNUSED_P(context), +external_entity_ref_param_checker(XML_Parser parameter, + const XML_Char *context, const XML_Char *UNUSED_P(base), const XML_Char *UNUSED_P(systemId), const XML_Char *UNUSED_P(publicId)) { - if ((void *)parser != handler_data) + const char *text = "<!ELEMENT doc (#PCDATA)*>"; + XML_Parser ext_parser; + + if ((void *)parameter != handler_data) fail("External entity ref handler parameter not correct"); + + /* Here we use the global 'parser' variable */ + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); return XML_STATUS_OK; } @@ -2169,9 +3868,28 @@ START_TEST(test_empty_parse) END_TEST /* Test odd corners of the XML_GetBuffer interface */ -START_TEST(test_get_buffer_1) +static enum XML_Status +get_feature(enum XML_FeatureEnum feature_id, long *presult) { - const char *text = + const XML_Feature *feature = XML_GetFeatureList(); + + if (feature == NULL) + return XML_STATUS_ERROR; + for (; feature->feature != XML_FEATURE_END; feature++) { + if (feature->feature == feature_id) { + *presult = feature->value; + return XML_STATUS_OK; + } + } + return XML_STATUS_ERROR; +} + +/* Having an element name longer than 1024 characters exercises some + * of the pool allocation code in the parser that otherwise does not + * get executed. The count at the end of the line is the number of + * characters (bytes) in the element name by that point.x + */ +static const char *get_buffer_test_text = "<documentwitharidiculouslylongelementnametotease" /* 0x030 */ "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */ "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */ @@ -2194,7 +3912,13 @@ START_TEST(test_get_buffer_1) "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */ "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */ "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */ + +/* Test odd corners of the XML_GetBuffer interface */ +START_TEST(test_get_buffer_1) +{ + const char *text = get_buffer_test_text; void *buffer; + long context_bytes; /* Attempt to allocate a negative length buffer */ if (XML_GetBuffer(parser, -12) != NULL) @@ -2210,8 +3934,20 @@ START_TEST(test_get_buffer_1) if (XML_GetBuffer(parser, INT_MAX) != NULL) fail("INT_MAX buffer not failed"); - /* Now try extending it a more reasonable but still too large amount */ - if (XML_GetBuffer(parser, INT_MAX - 2049) != NULL) + /* Now try extending it a more reasonable but still too large + * amount. The allocator in XML_GetBuffer() doubles the buffer + * size until it exceeds the requested amount or INT_MAX. If it + * exceeds INT_MAX, it rejects the request, so we want a request + * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, + * with an extra byte just to ensure that the request is off any + * boundary. The request will be inflated internally by + * XML_CONTEXT_BYTES (if defined), so we subtract that from our + * request. + */ + if (get_feature(XML_FEATURE_CONTEXT_BYTES, + &context_bytes) != XML_STATUS_OK) + context_bytes = 0; + if (XML_GetBuffer(parser, INT_MAX - (context_bytes + 1025)) != NULL) fail("INT_MAX- buffer not failed"); /* Now try extending it a carefully crafted amount */ @@ -2223,29 +3959,7 @@ END_TEST /* Test more corners of the XML_GetBuffer interface */ START_TEST(test_get_buffer_2) { - const char *text = - "<documentwitharidiculouslylongelementnametotease" /* 0x030 */ - "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */ - "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0c0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0f0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x120 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x150 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x180 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1b0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1e0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x210 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x240 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x270 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2a0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2d0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x300 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x330 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x360 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x390 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */ - "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */ - "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */ + const char *text = get_buffer_test_text; void *buffer; /* Now get a decent buffer */ @@ -2282,21 +3996,31 @@ START_TEST(test_byte_info_at_end) END_TEST /* Test position information from errors */ +#define PRE_ERROR_STR "<doc></" +#define POST_ERROR_STR "wombat></doc>" START_TEST(test_byte_info_at_error) { - const char *text = "<doc></wombat></doc>"; + const char *text = PRE_ERROR_STR POST_ERROR_STR; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) fail("Syntax error not faulted"); if (XML_GetCurrentByteCount(parser) != 0) fail("Error byte count incorrect"); - if (XML_GetCurrentByteIndex(parser) != 7) + if (XML_GetCurrentByteIndex(parser) != strlen(PRE_ERROR_STR)) fail("Error byte index incorrect"); } END_TEST +#undef PRE_ERROR_STR +#undef POST_ERROR_STR /* Test position information in handler */ +typedef struct ByteTestData { + int start_element_len; + int cdata_len; + int total_string_len; +} ByteTestData; + static void byte_character_handler(void *userData, const XML_Char *s, @@ -2305,17 +4029,21 @@ byte_character_handler(void *userData, #ifdef XML_CONTEXT_BYTES int offset, size; const char *buffer; - intptr_t buflen = (intptr_t)userData; + ByteTestData *data = (ByteTestData *)userData; buffer = XML_GetInputContext(parser, &offset, &size); if (buffer == NULL) fail("Failed to get context buffer"); + if (offset != data->start_element_len) + fail("Context offset in unexpected position"); + if (len != data->cdata_len) + fail("CDATA length reported incorrectly"); + if (size != data->total_string_len) + fail("Context size is not full buffer"); if (XML_GetCurrentByteIndex(parser) != offset) fail("Character byte index incorrect"); if (XML_GetCurrentByteCount(parser) != len) fail("Character byte count incorrect"); - if (buflen != size) - fail("Buffer length incorrect"); if (s != buffer + offset) fail("Buffer position incorrect"); #else @@ -2325,21 +4053,56 @@ byte_character_handler(void *userData, #endif } +#define START_ELEMENT "<e>" +#define CDATA_TEXT "Hello" +#define END_ELEMENT "</e>" START_TEST(test_byte_info_at_cdata) { - const char *text = "<doc>Hello</doc>"; + const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; int offset, size; + ByteTestData data; /* Check initial context is empty */ if (XML_GetInputContext(parser, &offset, &size) != NULL) fail("Unexpected context at start of parse"); + data.start_element_len = strlen(START_ELEMENT); + data.cdata_len = strlen(CDATA_TEXT); + data.total_string_len = strlen(text); XML_SetCharacterDataHandler(parser, byte_character_handler); - XML_SetUserData(parser, (void *)strlen(text)); + XML_SetUserData(parser, &data); if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_OK) xml_failure(parser); } END_TEST +#undef START_ELEMENT +#undef CDATA_TEXT +#undef END_ELEMENT + +/* Test predefined entities are correctly recognised */ +START_TEST(test_predefined_entities) +{ + const char *text = "<doc><>&"'</doc>"; + const char *result = "<>&\"'"; + CharData storage; + + XML_SetDefaultHandler(parser, accumulate_characters); + /* run_character_check uses XML_SetCharacterDataHandler(), which + * unfortunately heads off a code path that we need to exercise. + */ + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + /* The default handler doesn't translate the entities */ + CharData_CheckXMLChars(&storage, text); + + /* Now try again and check the translation */ + XML_ParserReset(parser, NULL); + run_character_check(text, result); +} +END_TEST /* Regression test that an invalid tag in an external parameter * reference in an external DTD is correctly faulted. @@ -2390,6 +4153,7 @@ external_entity_param(XML_Parser parser, fail("Unknown system ID"); } + XML_ParserFree(ext_parser); return XML_STATUS_ERROR; } @@ -2406,6 +4170,2640 @@ START_TEST(test_invalid_tag_in_dtd) } END_TEST +/* Test entities not quite the predefined ones are not mis-recognised */ +START_TEST(test_not_predefined_entities) +{ + const char *text[] = { + "<doc>&pt;</doc>", + "<doc>&amo;</doc>", + "<doc>&quid;</doc>", + "<doc>&apod;</doc>", + NULL + }; + int i = 0; + + while (text[i] != NULL) { + expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, + "Undefined entity not rejected"); + XML_ParserReset(parser, NULL); + i++; + } +} +END_TEST + +/* Test conditional inclusion (IGNORE) */ +static int XMLCALL +external_entity_load_ignore(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = "<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc><e>&entity;</e></doc>"; + const char *expected = + "<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, external_entity_load_ignore); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +static int XMLCALL +external_entity_load_ignore_utf16(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char text[] = + /* <![IGNORE[<!ELEMENT e (#PCDATA)*>]]> */ + "<\0!\0[\0I\0G\0N\0O\0R\0E\0[\0" + "<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 \0" + "(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>\0"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section_utf16) +{ + const char text[] = + /* <!DOCTYPE d SYSTEM 's'> */ + "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " + "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" + /* <d><e>&en;</e></d> */ + "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; + const XML_Char *expected = + "<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, + external_entity_load_ignore_utf16); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +static int XMLCALL +external_entity_load_ignore_utf16_be(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char text[] = + /* <![IGNORE[<!ELEMENT e (#PCDATA)*>]]> */ + "\0<\0!\0[\0I\0G\0N\0O\0R\0E\0[" + "\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 \0e\0 " + "\0(\0#\0P\0C\0D\0A\0T\0A\0)\0*\0>\0]\0]\0>"; + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ignore_section_utf16_be) +{ + const char text[] = + /* <!DOCTYPE d SYSTEM 's'> */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " + "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" + /* <d><e>&en;</e></d> */ + "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; + const XML_Char *expected = + "<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &storage); + XML_SetExternalEntityRefHandler(parser, + external_entity_load_ignore_utf16_be); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler); + XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, dummy_start_element); + XML_SetEndElementHandler(parser, dummy_end_element); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test mis-formatted conditional exclusion */ +START_TEST(test_bad_ignore_section) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc><e>&entity;</e></doc>"; + ExtFaults faults[] = { + { + "<![IGNORE[<!ELEM", + "Broken-off declaration not faulted", + NULL, + XML_ERROR_SYNTAX + }, + { + "<![IGNORE[\x01]]>", + "Invalid XML character not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + /* FIrst two bytes of a three-byte char */ + "<![IGNORE[\xe2\x82", + "Partial XML character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + ExtFaults *fault; + + for (fault = &faults[0]; fault->parse_text != NULL; fault++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Incomplete IGNORE section not failed"); + XML_ParserReset(parser, NULL); + } +} +END_TEST + +/* Test recursive parsing */ +static int XMLCALL +external_entity_valuer(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" + "<!ENTITY % e2 '%e1;'>\n" + "%e1;\n"; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "004-1.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + else if (!strcmp(systemId, "004-2.ent")) { + ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser); + enum XML_Status status; + enum XML_Error error; + + status = _XML_Parse_SINGLE_BYTES(ext_parser, + fault->parse_text, + strlen(fault->parse_text), + XML_TRUE); + if (fault->error == XML_ERROR_NONE) { + if (status == XML_STATUS_ERROR) + xml_failure(ext_parser); + } else { + if (status != XML_STATUS_ERROR) + fail(fault->fail_text); + error = XML_GetErrorCode(ext_parser); + if (error != fault->error && + (fault->error != XML_ERROR_XML_DECL || + error != XML_ERROR_TEXT_DECL)) + xml_failure(ext_parser); + } + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_external_entity_values) +{ + const char *text = + "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" + "<doc></doc>\n"; + ExtFaults data_004_2[] = { + { + "<!ATTLIST doc a1 CDATA 'value'>", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "<!ATTLIST $doc a1 CDATA 'value'>", + "Invalid token not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + "'wombat", + "Unterminated string not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "\xe2\x82", + "Partial UTF-8 character not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "<?xml version='1.0' encoding='utf-8'?>\n", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "<?xml?>", + "Malformed XML declaration not faulted", + NULL, + XML_ERROR_XML_DECL + }, + { + /* UTF-8 BOM */ + "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", + NULL, + NULL, + XML_ERROR_NONE + }, + { + "<?xml version='1.0' encoding='utf-8'?>\n$", + "Invalid token after text declaration not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }, + { + "<?xml version='1.0' encoding='utf-8'?>\n'wombat", + "Unterminated string after text decl not faulted", + NULL, + XML_ERROR_UNCLOSED_TOKEN + }, + { + "<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", + "Partial UTF-8 character after text decl not faulted", + NULL, + XML_ERROR_PARTIAL_CHAR + }, + { + "%e1;", + "Recursive parameter entity not faulted", + NULL, + XML_ERROR_RECURSIVE_ENTITY_REF + }, + { NULL, NULL, NULL, XML_ERROR_NONE } + }; + int i; + + for (i = 0; data_004_2[i].parse_text != NULL; i++) { + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_valuer); + XML_SetUserData(parser, &data_004_2[i]); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + XML_ParserReset(parser, NULL); + } +} +END_TEST + +/* Test the recursive parse interacts with a not standalone handler */ +static int XMLCALL +external_entity_not_standalone(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM 'bar'>\n" + "%e1;\n"; + const char *text2 = "<!ATTLIST doc a1 CDATA 'value'>"; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "foo")) { + XML_SetNotStandaloneHandler(ext_parser, + reject_not_standalone_handler); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) != XML_STATUS_ERROR) + fail("Expected not standalone rejection"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_NOT_STANDALONE) + xml_failure(ext_parser); + XML_SetNotStandaloneHandler(ext_parser, NULL); + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; + } + else if (!strcmp(systemId, "bar")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_not_standalone) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc></doc>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_not_standalone); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Standalone rejection not caught"); +} +END_TEST + +static int XMLCALL +external_entity_value_aborter(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text1 = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" + "<!ENTITY % e2 '%e1;'>\n" + "%e1;\n"; + const char *text2 = + "<?xml version='1.0' encoding='utf-8'?>"; + XML_Parser ext_parser; + + if (systemId == NULL) + return XML_STATUS_OK; + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + if (!strcmp(systemId, "004-1.ent")) { + if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, strlen(text1), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + } + if (!strcmp(systemId, "004-2.ent")) { + XML_SetXmlDeclHandler(ext_parser, entity_suspending_xdecl_handler); + XML_SetUserData(ext_parser, ext_parser); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text2, strlen(text2), + XML_TRUE) != XML_STATUS_ERROR) + fail("Aborted parse not faulted"); + if (XML_GetErrorCode(ext_parser) != XML_ERROR_ABORTED) + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_ext_entity_value_abort) +{ + const char *text = + "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" + "<doc></doc>\n"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_value_aborter); + resumable = XML_FALSE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_bad_public_doctype) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" + "<doc></doc>"; + + /* Setting a handler provokes a particular code path */ + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_handler, + dummy_end_doctype_handler); + expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); +} +END_TEST + +/* Test based on ibm/valid/P32/ibm32v04.xml */ +START_TEST(test_attribute_enum_value) +{ + const char *text = + "<?xml version='1.0' standalone='no'?>\n" + "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" + "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; + ExtTest dtd_data = { + "<!ELEMENT animal (#PCDATA|a)*>\n" + "<!ELEMENT a EMPTY>\n" + "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", + NULL, + NULL + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + run_ext_character_check(text, &dtd_data, + "This is a \n \n\nyellow tiger"); +} +END_TEST + +/* Slightly bizarrely, the library seems to silently ignore entity + * definitions for predefined entities, even when they are wrong. The + * language of the XML 1.0 spec is somewhat unhelpful as to what ought + * to happen, so this is currently treated as acceptable. + */ +START_TEST(test_predefined_entity_redefinition) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ENTITY apos 'foo'>\n" + "]>\n" + "<doc>'</doc>"; + run_character_check(text, "'"); +} +END_TEST + +/* Test that the parser stops processing the DTD after an unresolved + * parameter entity is encountered. + */ +START_TEST(test_dtd_stop_processing) +{ + const char *text = + "<!DOCTYPE doc [\n" + "%foo;\n" + "<!ENTITY bar 'bas'>\n" + "]><doc/>"; + + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != 0) + fail("DTD processing still going after undefined PE"); +} +END_TEST + +/* Test public notations with no system ID */ +START_TEST(test_public_notation_no_sysid) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!NOTATION note PUBLIC 'foo'>\n" + "<!ELEMENT doc EMPTY>\n" + "]>\n<doc/>"; + + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation declaration handler not called"); +} +END_TEST + +static void XMLCALL +record_element_start_handler(void *userData, + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + CharData_AppendString((CharData *)userData, name); +} + +START_TEST(test_nested_groups) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc " + /* Sixteen elements per line */ + "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," + "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" + "))))))))))))))))))))))))))))))))>\n" + "<!ELEMENT e EMPTY>" + "]>\n" + "<doc><e/></doc>"; + CharData storage; + + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_group_choice) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc (a|b|c)+>\n" + "<!ELEMENT a EMPTY>\n" + "<!ELEMENT b (#PCDATA)>\n" + "<!ELEMENT c ANY>\n" + "]>\n" + "<doc>\n" + "<a/>\n" + "<b attr='foo'>This is a foo</b>\n" + "<c></c>\n" + "</doc>\n"; + + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +static int XMLCALL +external_entity_public(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *publicId) +{ + const char *text1 = (const char *)XML_GetUserData(parser); + const char *text2 = "<!ATTLIST doc a CDATA 'value'>"; + const char *text = NULL; + XML_Parser ext_parser; + int parse_res; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + if (systemId != NULL && !strcmp(systemId, "http://example.org/")) { + text = text1; + } + else if (publicId != NULL && !strcmp(publicId, "foo")) { + text = text2; + } + else + fail("Unexpected parameters to external entity parser"); + parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + return parse_res; +} + +START_TEST(test_standalone_parameter_entity) +{ + const char *text = + "<?xml version='1.0' standalone='yes'?>\n" + "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" + "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" + "%entity;\n" + "]>\n" + "<doc></doc>"; + char dtd_data[] = + "<!ENTITY % e1 'foo'>\n"; + + XML_SetUserData(parser, dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test skipping of parameter entity in an external DTD */ +/* Derived from ibm/invalid/P69/ibm69i01.xml */ +START_TEST(test_skipped_parameter_entity) +{ + const char *text = + "<?xml version='1.0'?>\n" + "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" + "<!ELEMENT root (#PCDATA|a)* >\n" + "]>\n" + "<root></root>"; + ExtTest dtd_data = { "%pe2;", NULL, NULL }; + + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetSkippedEntityHandler(parser, dummy_skip_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (dummy_handler_flags != DUMMY_SKIP_HANDLER_FLAG) + fail("Skip handler not executed"); +} +END_TEST + +/* Test recursive parameter entity definition rejected in external DTD */ +START_TEST(test_recursive_external_parameter_entity) +{ + const char *text = + "<?xml version='1.0'?>\n" + "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" + "<!ELEMENT root (#PCDATA|a)* >\n" + "]>\n" + "<root></root>"; + ExtFaults dtd_data = { + "<!ENTITY % pe2 '%pe2;'>\n%pe2;", + "Recursive external parameter entity not faulted", + NULL, + XML_ERROR_RECURSIVE_ENTITY_REF + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &dtd_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + expect_failure(text, + XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Recursive external parameter not spotted"); +} +END_TEST + +/* Test undefined parameter entity in external entity handler */ +static int XMLCALL +external_entity_devaluer(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *systemId, + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 SYSTEM 'bar'>\n" + "%e1;\n"; + XML_Parser ext_parser; + int clear_handler = (intptr_t)XML_GetUserData(parser); + + if (systemId == NULL || !strcmp(systemId, "bar")) + return XML_STATUS_OK; + if (strcmp(systemId, "foo")) + fail("Unexpected system ID"); + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could note create external entity parser"); + if (clear_handler) + XML_SetExternalEntityRefHandler(ext_parser, NULL); + if (_XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_undefined_ext_entity_in_external_dtd) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo'>\n" + "<doc></doc>\n"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_devaluer); + XML_SetUserData(parser, (void *)(intptr_t)XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Now repeat without the external entity ref handler invoking + * another copy of itself. + */ + XML_ParserReset(parser, NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_devaluer); + XML_SetUserData(parser, (void *)(intptr_t)XML_TRUE); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + + +static void XMLCALL +aborting_xdecl_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(version), + const XML_Char *UNUSED_P(encoding), + int UNUSED_P(standalone)) +{ + XML_StopParser(parser, resumable); + XML_SetXmlDeclHandler(parser, NULL); +} + +/* Test suspending the parse on receiving an XML declaration works */ +START_TEST(test_suspend_xdecl) +{ + const char *text = long_character_data_text; + + XML_SetXmlDeclHandler(parser, aborting_xdecl_handler); + resumable = XML_TRUE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + xml_failure(parser); + /* Attempt to start a new parse while suspended */ + if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Attempt to parse while suspended not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED) + fail("Suspended parse not faulted with correct error"); +} +END_TEST + +/* Test aborting the parse in an epilog works */ +static void XMLCALL +selective_aborting_default_handler(void *userData, + const XML_Char *s, + int len) +{ + const char *match = (const char *)userData; + + if (match == NULL || + (strlen(match) == (unsigned)len && + !strncmp(match, s, len))) { + XML_StopParser(parser, resumable); + XML_SetDefaultHandler(parser, NULL); + } +} + +START_TEST(test_abort_epilog) +{ + const char *text = "<doc></doc>\n\r\n"; + char match[] = "\r"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_FALSE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Abort not triggered"); + if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED) + xml_failure(parser); +} +END_TEST + +/* Test a different code path for abort in the epilog */ +START_TEST(test_abort_epilog_2) +{ + const char *text = "<doc></doc>\n"; + char match[] = "\n"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_FALSE; + expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); +} +END_TEST + +/* Test suspension from the epilog */ +START_TEST(test_suspend_epilog) +{ + const char *text = "<doc></doc>\n"; + char match[] = "\n"; + + XML_SetDefaultHandler(parser, selective_aborting_default_handler); + XML_SetUserData(parser, match); + resumable = XML_TRUE; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); +} +END_TEST + +START_TEST(test_unfinished_epilog) +{ + const char *text = "<doc></doc><"; + + expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, + "Incomplete epilog entry not faulted"); +} +END_TEST + +START_TEST(test_partial_char_in_epilog) +{ + const char *text = "<doc></doc>\xe2\x82"; + + /* First check that no fault is raised if the parse is not finished */ + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); + /* Now check that it is faulted once we finish */ + if (XML_ParseBuffer(parser, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Partial character in epilog not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_PARTIAL_CHAR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_hash_collision) +{ + /* For full coverage of the lookup routine, we need to ensure a + * hash collision even though we can only tell that we have one + * through breakpoint debugging or coverage statistics. The + * following will cause a hash collision on machines with a 64-bit + * long type; others will have to experiment. The full coverage + * tests invoked from qa.sh usually provide a hash collision, but + * not always. This is an attempt to provide insurance. + */ +#define COLLIDING_HASH_SALT (unsigned long)_SIP_ULL(0xffffffffU, 0xff99fc90U) + const char * text = + "<doc>\n" + "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" + "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" + "<b5></b5><b6></b6><b7></b7><b8></b8>\n" + "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" + "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" + "<d8>This triggers the table growth and collides with b2</d8>\n" + "</doc>\n"; + + XML_SetHashSalt(parser, COLLIDING_HASH_SALT); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST +#undef COLLIDING_HASH_SALT + +/* Test resuming a parse suspended in entity substitution */ +static void XMLCALL +start_element_suspender(void *UNUSED_P(userData), + const XML_Char *name, + const XML_Char **UNUSED_P(atts)) +{ + if (!strcmp(name, "suspend")) + XML_StopParser(parser, XML_TRUE); +} + +START_TEST(test_suspend_resume_internal_entity) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" + "]>\n" + "<doc>&foo;</doc>\n"; + const char *expected1 = "Hi"; + const char *expected2 = "HiHo"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, start_element_suspender); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, ""); + if (XML_ResumeParser(parser) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected1); + if (XML_ResumeParser(parser) != XML_STATUS_OK) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected2); +} +END_TEST + +/* Test syntax error is caught at parse resumption */ +START_TEST(test_resume_entity_with_syntax_error) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ENTITY foo '<suspend>Hi</wombat>'>\n" + "]>\n" + "<doc>&foo;</doc>\n"; + + XML_SetStartElementHandler(parser, start_element_suspender); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + if (XML_ResumeParser(parser) != XML_STATUS_ERROR) + fail("Syntax error in entity not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH) + xml_failure(parser); +} +END_TEST + +/* Test suspending and resuming in a parameter entity substitution */ +static void XMLCALL +element_decl_suspender(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(name), + XML_Content *model) +{ + XML_StopParser(parser, XML_TRUE); + XML_FreeContentModel(parser, model); +} + +START_TEST(test_suspend_resume_parameter_entity) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" + "%foo;\n" + "]>\n" + "<doc>Hello, world</doc>"; + const char *expected = "Hello, world"; + CharData storage; + + CharData_Init(&storage); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetElementDeclHandler(parser, element_decl_suspender); + XML_SetCharacterDataHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (XML_Parse(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + CharData_CheckXMLChars(&storage, ""); + if (XML_ResumeParser(parser) != XML_STATUS_OK) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test attempting to use parser after an error is faulted */ +START_TEST(test_restart_on_error) +{ + const char *text = "<$doc><doc></doc>"; + + if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid tag name not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); + if (XML_Parse(parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) + fail("Restarting invalid parse not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN) + xml_failure(parser); +} +END_TEST + +/* Test that angle brackets in an attribute default value are faulted */ +START_TEST(test_reject_lt_in_attribute_value) +{ + const char *text = + "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" + "<doc></doc>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad attribute default not faulted"); +} +END_TEST + +START_TEST(test_reject_unfinished_param_in_att_value) +{ + const char *text = + "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" + "<doc></doc>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad attribute default not faulted"); +} +END_TEST + +START_TEST(test_trailing_cr_in_att_value) +{ + const char *text = "<doc a='value\r'/>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Try parsing a general entity within a parameter entity in a + * standalone internal DTD. Covers a corner case in the parser. + */ +START_TEST(test_standalone_internal_entity) +{ + const char *text = + "<?xml version='1.0' standalone='yes' ?>\n" + "<!DOCTYPE doc [\n" + " <!ELEMENT doc (#PCDATA)>\n" + " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" + " <!ENTITY ge 'AttDefaultValue'>\n" + " %pe;\n" + "]>\n" + "<doc att2='any'/>"; + + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that a reference to an unknown external entity is skipped */ +START_TEST(test_skipped_external_entity) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc></doc>\n"; + ExtTest test_data = { + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e2 '%e1;'>\n", + NULL, + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test a different form of unknown external entity */ +typedef struct ext_hdlr_data { + const char *parse_text; + XML_ExternalEntityRefHandler handler; +} ExtHdlrData; + +static int XMLCALL +external_entity_oneshot_loader(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtHdlrData *test_data = (ExtHdlrData *)XML_GetUserData(parser); + XML_Parser ext_parser; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser."); + /* Use the requested entity parser for further externals */ + XML_SetExternalEntityRefHandler(ext_parser, test_data->handler); + if ( _XML_Parse_SINGLE_BYTES(ext_parser, + test_data->parse_text, + strlen(test_data->parse_text), + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(ext_parser); + } + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +START_TEST(test_skipped_null_loaded_ext_entity) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" + "<doc />"; + ExtHdlrData test_data = { + "<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" + "<!ENTITY % pe2 '%pe1;'>\n" + "%pe2;\n", + external_entity_null_loader + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +START_TEST(test_skipped_unloaded_ext_entity) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" + "<doc />"; + ExtHdlrData test_data = { + "<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" + "<!ENTITY % pe2 '%pe1;'>\n" + "%pe2;\n", + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that a parameter entity value ending with a carriage return + * has it translated internally into a newline. + */ +START_TEST(test_param_entity_with_trailing_cr) +{ +#define PARAM_ENTITY_NAME "pe" +#define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc/>"; + ExtTest test_data = { + "<!ENTITY % " PARAM_ENTITY_NAME + " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" + "%" PARAM_ENTITY_NAME ";\n", + NULL, + NULL + }; + + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader); + XML_SetEntityDeclHandler(parser, param_entity_match_handler); + entity_name_to_match = PARAM_ENTITY_NAME; + entity_value_to_match = PARAM_ENTITY_CORE_VALUE "\n"; + entity_match_flag = ENTITY_MATCH_NOT_FOUND; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + if (entity_match_flag == ENTITY_MATCH_FAIL) + fail("Parameter entity CR->NEWLINE conversion failed"); + else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) + fail("Parameter entity not parsed"); +} +#undef PARAM_ENTITY_NAME +#undef PARAM_ENTITY_CORE_VALUE +END_TEST + +START_TEST(test_invalid_character_entity) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY entity '�'>\n" + "]>\n" + "<doc>&entity;</doc>"; + + expect_failure(text, XML_ERROR_BAD_CHAR_REF, + "Out of range character reference not faulted"); +} +END_TEST + +START_TEST(test_invalid_character_entity_2) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY entity '&#xg0;'>\n" + "]>\n" + "<doc>&entity;</doc>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Out of range character reference not faulted"); +} +END_TEST + +START_TEST(test_invalid_character_entity_3) +{ + const char text[] = + /* <!DOCTYPE doc [\n */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" + /* U+0E04 = KHO KHWAI + * U+0E08 = CHO CHAN */ + /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " + "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" + /* ]>\n */ + "\0]\0>\0\n" + /* <doc>&entity;</doc> */ + "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid start of entity name not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_UNDEFINED_ENTITY) + xml_failure(parser); +} +END_TEST + +START_TEST(test_invalid_character_entity_4) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY entity '�'>\n" /* = � */ + "]>\n" + "<doc>&entity;</doc>"; + + expect_failure(text, XML_ERROR_BAD_CHAR_REF, + "Out of range character reference not faulted"); +} +END_TEST + + +/* Test that processing instructions are picked up by a default handler */ +START_TEST(test_pi_handled_in_default) +{ + const char *text = "<?test processing instruction?>\n<doc/>"; + const XML_Char *expected = "<?test processing instruction?>\n<doc/>"; + CharData storage; + + CharData_Init(&storage); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE)== XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + + +/* Test that comments are picked up by a default handler */ +START_TEST(test_comment_handled_in_default) +{ + const char *text = "<!-- This is a comment -->\n<doc/>"; + const XML_Char *expected = "<!-- This is a comment -->\n<doc/>"; + CharData storage; + + CharData_Init(&storage); + XML_SetDefaultHandler(parser, accumulate_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test PIs that look almost but not quite like XML declarations */ +static void XMLCALL +accumulate_pi_characters(void *userData, + const XML_Char *target, + const XML_Char *data) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, target, -1); + CharData_AppendXMLChars(storage, ": ", 2); + CharData_AppendXMLChars(storage, data, -1); + CharData_AppendXMLChars(storage, "\n", 1); +} + +START_TEST(test_pi_yml) +{ + const char *text = "<?yml something like data?><doc/>"; + const XML_Char *expected = "yml: something like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_pi_xnl) +{ + const char *text = "<?xnl nothing like data?><doc/>"; + const XML_Char *expected = "xnl: nothing like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_pi_xmm) +{ + const char *text = "<?xmm everything like data?><doc/>"; + const XML_Char *expected = "xmm: everything like data\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_pi) +{ + const char text[] = + /* <?{KHO KHWAI}{CHO CHAN}?> + * where {KHO KHWAI} = U+0E04 + * and {CHO CHAN} = U+0E08 + */ + "<\0?\0\x04\x0e\x08\x0e?\0>\0" + /* <q/> */ + "<\0q\0/\0>\0"; + const XML_Char *expected = "\xe0\xb8\x84\xe0\xb8\x88: \n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_be_pi) +{ + const char text[] = + /* <?{KHO KHWAI}{CHO CHAN}?> + * where {KHO KHWAI} = U+0E04 + * and {CHO CHAN} = U+0E08 + */ + "\0<\0?\x0e\x04\x0e\x08\0?\0>" + /* <q/> */ + "\0<\0q\0/\0>"; + const XML_Char *expected = "\xe0\xb8\x84\xe0\xb8\x88: \n"; + CharData storage; + + CharData_Init(&storage); + XML_SetProcessingInstructionHandler(parser, accumulate_pi_characters); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that comments can be picked up and translated */ +static void XMLCALL +accumulate_comment(void *userData, + const XML_Char *data) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, data, -1); +} + +START_TEST(test_utf16_be_comment) +{ + const char text[] = + /* <!-- Comment A --> */ + "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" + /* <doc/> */ + "\0<\0d\0o\0c\0/\0>"; + const XML_Char *expected = " Comment A "; + CharData storage; + + CharData_Init(&storage); + XML_SetCommentHandler(parser, accumulate_comment); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_le_comment) +{ + const char text[] = + /* <!-- Comment B --> */ + "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" + /* <doc/> */ + "<\0d\0o\0c\0/\0>\0"; + const XML_Char *expected = " Comment B "; + CharData storage; + + CharData_Init(&storage); + XML_SetCommentHandler(parser, accumulate_comment); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that the unknown encoding handler with map entries that expect + * conversion but no conversion function is faulted + */ +static int XMLCALL +failing_converter(void *UNUSED_P(data), const char *UNUSED_P(s)) +{ + /* Always claim to have failed */ + return -1; +} + +static int XMLCALL +prefix_converter(void *UNUSED_P(data), const char *s) +{ + /* If the first byte is 0xff, raise an error */ + if (s[0] == -1) + return -1; + /* Just add the low bits of the first byte to the second */ + return (s[1] + (s[0] & 0x7f)) & 0x01ff; +} + +static int XMLCALL +MiscEncodingHandler(void *data, + const XML_Char *encoding, + XML_Encoding *info) +{ + int i; + int high_map = -2; /* Assume a 2-byte sequence */ + + if (!strcmp(encoding, "invalid-9") || + !strcmp(encoding, "ascii-like") || + !strcmp(encoding, "invalid-len") || + !strcmp(encoding, "invalid-a") || + !strcmp(encoding, "invalid-surrogate") || + !strcmp(encoding, "invalid-high")) + high_map = -1; + + for (i = 0; i < 128; ++i) + info->map[i] = i; + for (; i < 256; ++i) + info->map[i] = high_map; + + /* If required, put an invalid value in the ASCII entries */ + if (!strcmp(encoding, "invalid-9")) + info->map[9] = 5; + /* If required, have a top-bit set character starts a 5-byte sequence */ + if (!strcmp(encoding, "invalid-len")) + info->map[0x81] = -5; + /* If required, make a top-bit set character a valid ASCII character */ + if (!strcmp(encoding, "invalid-a")) + info->map[0x82] = 'a'; + /* If required, give a top-bit set character a forbidden value, + * what would otherwise be the first of a surrogate pair. + */ + if (!strcmp(encoding, "invalid-surrogate")) + info->map[0x83] = 0xd801; + /* If required, give a top-bit set character too high a value */ + if (!strcmp(encoding, "invalid-high")) + info->map[0x84] = 0x010101; + + info->data = data; + info->release = NULL; + if (!strcmp(encoding, "failing-conv")) + info->convert = failing_converter; + else if (!strcmp(encoding, "prefix-conv")) + info->convert = prefix_converter; + else + info->convert = NULL; + return XML_STATUS_OK; +} + +START_TEST(test_missing_encoding_conversion_fn) +{ + const char *text = + "<?xml version='1.0' encoding='no-conv'?>\n" + "<doc>\x81</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + /* MiscEncodingHandler sets up an encoding with every top-bit-set + * character introducing a two-byte sequence. For this, it + * requires a convert function. The above function call doesn't + * pass one through, so when BadEncodingHandler actually gets + * called it should supply an invalid encoding. + */ + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Encoding with missing convert() not faulted"); +} +END_TEST + +START_TEST(test_failing_encoding_conversion_fn) +{ + const char *text = + "<?xml version='1.0' encoding='failing-conv'?>\n" + "<doc>\x81</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + /* BadEncodingHandler sets up an encoding with every top-bit-set + * character introducing a two-byte sequence. For this, it + * requires a convert function. The above function call passes + * one that insists all possible sequences are invalid anyway. + */ + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Encoding with failing convert() not faulted"); +} +END_TEST + +/* Test unknown encoding conversions */ +START_TEST(test_unknown_encoding_success) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + /* Equivalent to <eoc>Hello, world</eoc> */ + "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hello, world"); +} +END_TEST + +/* Test bad name character in unknown encoding */ +START_TEST(test_unknown_encoding_bad_name) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<\xff\x64oc>Hello, world</\xff\x64oc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad name start in unknown encoding not faulted"); +} +END_TEST + +/* Test bad mid-name character in unknown encoding */ +START_TEST(test_unknown_encoding_bad_name_2) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<d\xffoc>Hello, world</d\xffoc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad name in unknown encoding not faulted"); +} +END_TEST + +/* Test element name that is long enough to fill the conversion buffer + * in an unknown encoding, finishing with an encoded character. + */ +START_TEST(test_unknown_encoding_long_name_1) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" + "Hi" + "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; + const XML_Char *expected = "abcdefghabcdefghabcdefghijklmnop"; + CharData storage; + + CharData_Init(&storage); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test element name that is long enough to fill the conversion buffer + * in an unknown encoding, finishing with an simple character. + */ +START_TEST(test_unknown_encoding_long_name_2) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<abcdefghabcdefghabcdefghijklmnop>" + "Hi" + "</abcdefghabcdefghabcdefghijklmnop>"; + const XML_Char *expected = "abcdefghabcdefghabcdefghijklmnop"; + CharData storage; + + CharData_Init(&storage); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_invalid_unknown_encoding) +{ + const char *text = + "<?xml version='1.0' encoding='invalid-9'?>\n" + "<doc>Hello world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_ascii_encoding_ok) +{ + const char *text = + "<?xml version='1.0' encoding='ascii-like'?>\n" + "<doc>Hello, world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hello, world"); +} +END_TEST + +START_TEST(test_unknown_ascii_encoding_fail) +{ + const char *text = + "<?xml version='1.0' encoding='ascii-like'?>\n" + "<doc>Hello, \x80 world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_length) +{ + const char *text = + "<?xml version='1.0' encoding='invalid-len'?>\n" + "<doc>Hello, world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_topbit) +{ + const char *text = + "<?xml version='1.0' encoding='invalid-a'?>\n" + "<doc>Hello, world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_surrogate) +{ + const char *text = + "<?xml version='1.0' encoding='invalid-surrogate'?>\n" + "<doc>Hello, \x82 world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_high) +{ + const char *text = + "<?xml version='1.0' encoding='invalid-high'?>\n" + "<doc>Hello, world</doc>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, + "Invalid unknown encoding not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_invalid_attr_value) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<doc attr='\xff\x30'/>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid attribute valid not faulted"); +} +END_TEST + +/* Test an external entity parser set to use latin-1 detects UTF-16 + * BOMs correctly. + */ +enum ee_parse_flags { + EE_PARSE_NONE = 0x00, + EE_PARSE_FULL_BUFFER = 0x01 +}; + +typedef struct ExtTest2 { + const char *parse_text; + int parse_len; + const char *encoding; + CharData *storage; + enum ee_parse_flags flags; +} ExtTest2; + +static int XMLCALL +external_entity_loader2(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtTest2 *test_data = (ExtTest2 *)XML_GetUserData(parser); + XML_Parser extparser; + + extparser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (extparser == NULL) + fail("Coulr not create external entity parser"); + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if (test_data->flags & EE_PARSE_FULL_BUFFER) { + if (XML_Parse(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(extparser); + } + } + else if (_XML_Parse_SINGLE_BYTES(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) == XML_STATUS_ERROR) { + xml_failure(extparser); + } + + XML_ParserFree(extparser); + return XML_STATUS_OK; +} + +/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ +static void XMLCALL +ext2_accumulate_characters(void *userData, const XML_Char *s, int len) +{ + ExtTest2 *test_data = (ExtTest2 *)userData; + accumulate_characters(test_data->storage, s, len); +} + +START_TEST(test_ext_entity_latin1_utf16le_bom) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xff\xfe\x4c\x20", + 4, + "iso-8859-1", + NULL, + EE_PARSE_NONE + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbf\xc3\xbeL "; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ext_entity_latin1_utf16be_bom) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xfe\xff\x20\x4c", + 4, + "iso-8859-1", + NULL, + EE_PARSE_NONE + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + + +/* Parsing the full buffer rather than a byte at a time makes a + * difference to the encoding scanning code, so repeat the above tests + * without breaking them down by byte. + */ +START_TEST(test_ext_entity_latin1_utf16le_bom2) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xff\xfe\x4c\x20", + 4, + "iso-8859-1", + NULL, + EE_PARSE_FULL_BUFFER + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbf\xc3\xbeL "; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ext_entity_latin1_utf16be_bom2) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + /* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ + /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, + * 0x4c = L and 0x20 is a space + */ + "\xfe\xff\x20\x4c", + 4, + "iso-8859-1", + NULL, + EE_PARSE_FULL_BUFFER + }; + /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ + const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; + CharData storage; + + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test little-endian UTF-16 given an explicit big-endian encoding */ +START_TEST(test_ext_entity_utf16_be) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + "<\0e\0/\0>\0", + 8, + "utf-16be", + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = + "\xe3\xb0\x80" /* U+3C00 */ + "\xe6\x94\x80" /* U+6A00 */ + "\xe2\xbc\x80" /* U+2F00 */ + "\xe3\xb8\x80"; /* U+3E00 */ + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test big-endian UTF-16 given an explicit little-endian encoding */ +START_TEST(test_ext_entity_utf16_le) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + "\0<\0e\0/\0>", + 8, + "utf-16le", + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = + "\xe3\xb0\x80" /* U+3C00 */ + "\xe6\x94\x80" /* U+6A00 */ + "\xe2\xbc\x80" /* U+2F00 */ + "\xe3\xb8\x80"; /* U+3E00 */ + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test little-endian UTF-16 given no explicit encoding. + * The existing default encoding (UTF-8) is assumed to hold without a + * BOM to contradict it, so the entity value will in fact provoke an + * error because 0x00 is not a valid XML character. We parse the + * whole buffer in one go rather than feeding it in byte by byte to + * exercise different code paths in the initial scanning routines. + */ +typedef struct ExtFaults2 { + const char *parse_text; + int parse_len; + const char *fail_text; + const char *encoding; + enum XML_Error error; +} ExtFaults2; + +static int XMLCALL +external_entity_faulter2(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + ExtFaults2 *test_data = (ExtFaults2 *)XML_GetUserData(parser); + XML_Parser extparser; + + extparser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (extparser == NULL) + fail("Could not create external entity parser"); + if (test_data->encoding != NULL) { + if (!XML_SetEncoding(extparser, test_data->encoding)) + fail("XML_SetEncoding() ignored for external entity"); + } + if (XML_Parse(extparser, + test_data->parse_text, + test_data->parse_len, + XML_TRUE) != XML_STATUS_ERROR) + fail(test_data->fail_text); + if (XML_GetErrorCode(extparser) != test_data->error) + xml_failure(extparser); + + XML_ParserFree(extparser); + return XML_STATUS_ERROR; +} + +START_TEST(test_ext_entity_utf16_unknown) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtFaults2 test_data = { + "a\0b\0c\0", + 6, + "Invalid character in entity not faulted", + NULL, + XML_ERROR_INVALID_TOKEN + }; + + XML_SetExternalEntityRefHandler(parser, external_entity_faulter2); + XML_SetUserData(parser, &test_data); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Invalid character should not have been accepted"); +} +END_TEST + +/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ +START_TEST(test_ext_entity_utf8_non_bom) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtTest2 test_data = { + "\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ + 3, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "\xef\xbb\x80"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that UTF-8 in a CDATA section is correctly passed through */ +START_TEST(test_utf8_in_cdata_section) +{ + const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; + const XML_Char *expected = "one \xc3\xa9 two"; + + run_character_check(text, expected); +} +END_TEST + +/* Test that little-endian UTF-16 in a CDATA section is handled */ +START_TEST(test_utf8_in_cdata_section_2) +{ + const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; + const XML_Char *expected = "\xc3\xa9]\xc3\xa9two"; + + run_character_check(text, expected); +} +END_TEST + +/* Test trailing spaces in elements are accepted */ +static void XMLCALL +record_element_end_handler(void *userData, + const XML_Char *name) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, "/", 1); + CharData_AppendXMLChars(storage, name, -1); +} + +START_TEST(test_trailing_spaces_in_elements) +{ + const char *text = "<doc >Hi</doc >"; + const XML_Char *expected = "doc/doc"; + CharData storage; + + CharData_Init(&storage); + XML_SetElementHandler(parser, record_element_start_handler, + record_element_end_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_attribute) +{ + const char text[] = + /* <d {KHO KHWAI}{CHO CHAN}='a'/> + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; + const XML_Char *expected = "a"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_utf16_second_attr) +{ + /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" + "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; + const XML_Char *expected = "1"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_attr_after_solidus) +{ + const char *text = "<doc attr1='a' / attr2='b'>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Misplaced / not faulted"); +} +END_TEST + +static void XMLCALL +accumulate_entity_decl(void *userData, + const XML_Char *entityName, + int UNUSED_P(is_parameter_entity), + const XML_Char *value, + int value_length, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId), + const XML_Char *UNUSED_P(notationName)) +{ + CharData *storage = (CharData *)userData; + + CharData_AppendXMLChars(storage, entityName, -1); + CharData_AppendXMLChars(storage, "=", 1); + CharData_AppendXMLChars(storage, value, value_length); + CharData_AppendXMLChars(storage, "\n", 1); +} + + +START_TEST(test_utf16_pe) +{ + /* <!DOCTYPE doc [ + * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> + * %{KHO KHWAI}{CHO CHAN}; + * ]> + * <doc></doc> + * + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " + "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " + "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" + "\0%\x0e\x04\x0e\x08\0;\0\n" + "\0]\0>\0\n" + "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; + const XML_Char *expected = + "\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetEntityDeclHandler(parser, accumulate_entity_decl); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that duff attribute description keywords are rejected */ +START_TEST(test_bad_attr_desc_keyword) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ATTLIST doc attr CDATA #!IMPLIED>\n" + "]>\n" + "<doc />"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Bad keyword !IMPLIED not faulted"); +} +END_TEST + +/* Test that an invalid attribute description keyword consisting of + * UTF-16 characters with their top bytes non-zero are correctly + * faulted + */ +START_TEST(test_bad_attr_desc_keyword_utf16) +{ + /* <!DOCTYPE d [ + * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> + * ]><d/> + * + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 + */ + const char text[] = + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" + "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " + "\0#\x0e\x04\x0e\x08\0>\0\n" + "\0]\0>\0<\0d\0/\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid UTF16 attribute keyword not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SYNTAX) + xml_failure(parser); +} +END_TEST + +/* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this + * using prefix-encoding (see above) to trigger specific code paths + */ +START_TEST(test_bad_doctype) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + expect_failure(text, XML_ERROR_SYNTAX, + "Invalid bytes in DOCTYPE not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_utf16) +{ + const char text[] = + /* <!DOCTYPE doc [ \x06f2 ]><doc/> + * + * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number + * (name character) but not a valid letter (name start character) + */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " + "\x06\xf2" + "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) != XML_STATUS_ERROR) + fail("Invalid bytes in DOCTYPE not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SYNTAX) + xml_failure(parser); +} +END_TEST + +START_TEST(test_bad_doctype_plus) +{ + const char *text = + "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" + "<1+>&foo;</1+>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'+' in document name not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_star) +{ + const char *text = + "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" + "<1*>&foo;</1*>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'*' in document name not faulted"); +} +END_TEST + +START_TEST(test_bad_doctype_query) +{ + const char *text = + "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" + "<1?>&foo;</1?>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "'?' in document name not faulted"); +} +END_TEST + +START_TEST(test_unknown_encoding_bad_ignore) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>" + "<!DOCTYPE doc SYSTEM 'foo'>" + "<doc><e>&entity;</e></doc>"; + ExtFaults fault = { + "<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", + "Invalid character not faulted", + "prefix-conv", + XML_ERROR_INVALID_TOKEN + }; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + XML_SetUserData(parser, &fault); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Bad IGNORE section with unknown encoding not failed"); +} +END_TEST + +START_TEST(test_entity_in_utf16_be_attr) +{ + const char text[] = + /* <e a='ä ä'></e> */ + "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " + "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; + const XML_Char *expected = "\xc3\xa4 \xc3\xa4"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_in_utf16_le_attr) +{ + const char text[] = + /* <e a='ä ä'></e> */ + "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" + "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; + const XML_Char *expected = "\xc3\xa4 \xc3\xa4"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_public_utf16_be) +{ + const char text[] = + /* <!DOCTYPE d [ */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" + /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " + "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" + /* %e; */ + "\0%\0e\0;\0\n" + /* ]> */ + "\0]\0>\0\n" + /* <d>&j;</d> */ + "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; + ExtTest2 test_data = { + /* <!ENTITY j 'baz'> */ + "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", + 34, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "baz"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_entity_public_utf16_le) +{ + const char text[] = + /* <!DOCTYPE d [ */ + "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" + /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ + "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" + "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" + /* %e; */ + "%\0e\0;\0\n\0" + /* ]> */ + "]\0>\0\n\0" + /* <d>&j;</d> */ + "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; + ExtTest2 test_data = { + /* <!ENTITY j 'baz'> */ + "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", + 34, + NULL, + NULL, + EE_PARSE_NONE + }; + const XML_Char *expected = "baz"; + CharData storage; + + CharData_Init(&storage); + test_data.storage = &storage; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUserData(parser, &test_data); + XML_SetCharacterDataHandler(parser, ext2_accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +/* Test that a doctype with neither an internal nor external subset is + * faulted + */ +START_TEST(test_short_doctype) +{ + const char *text = "<!DOCTYPE doc></doc>"; + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "DOCTYPE without subset not rejected"); +} +END_TEST + +START_TEST(test_short_doctype_2) +{ + const char *text = "<!DOCTYPE doc PUBLIC></doc>"; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE without Public ID not rejected"); +} +END_TEST + +START_TEST(test_short_doctype_3) +{ + const char *text = "<!DOCTYPE doc SYSTEM></doc>"; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE without System ID not rejected"); +} +END_TEST + +START_TEST(test_long_doctype) +{ + const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; + expect_failure(text, XML_ERROR_SYNTAX, + "DOCTYPE with extra ID not rejected"); +} +END_TEST + +START_TEST(test_bad_entity) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY foo PUBLIC>\n" + "]>\n" + "<doc/>"; + expect_failure(text, XML_ERROR_SYNTAX, + "ENTITY without Public ID is not rejected"); +} +END_TEST + +/* Test unquoted value is faulted */ +START_TEST(test_bad_entity_2) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY % foo bar>\n" + "]>\n" + "<doc/>"; + expect_failure(text, XML_ERROR_SYNTAX, + "ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_entity_3) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY % foo PUBLIC>\n" + "]>\n" + "<doc/>"; + expect_failure(text, XML_ERROR_SYNTAX, + "Parameter ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_entity_4) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY % foo SYSTEM>\n" + "]>\n" + "<doc/>"; + expect_failure(text, XML_ERROR_SYNTAX, + "Parameter ENTITY without Public ID is not rejected"); +} +END_TEST + +START_TEST(test_bad_notation) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!NOTATION n SYSTEM>\n" + "]>\n" + "<doc/>"; + expect_failure(text, XML_ERROR_SYNTAX, + "Notation without System ID is not rejected"); +} +END_TEST /* * Namespaces tests. @@ -2430,7 +6828,8 @@ namespace_teardown(void) provided as the userData argument; the first is the expected element name, and the second is the expected attribute name. */ -static int triplet_count = 0; +static int triplet_start_flag = XML_FALSE; +static int triplet_end_flag = XML_FALSE; static void XMLCALL triplet_start_checker(void *userData, const XML_Char *name, @@ -2446,7 +6845,7 @@ triplet_start_checker(void *userData, const XML_Char *name, sprintf(buffer, "unexpected attribute string: '%s'", atts[0]); fail(buffer); } - triplet_count++; + triplet_start_flag = XML_TRUE; } /* Check that the element name passed to the end-element handler matches @@ -2462,18 +6861,18 @@ triplet_end_checker(void *userData, const XML_Char *name) sprintf(buffer, "unexpected end string: '%s'", name); fail(buffer); } - triplet_count++; + triplet_end_flag = XML_TRUE; } START_TEST(test_return_ns_triplet) { const char *text = - "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n" - " xmlns:bar='http://expat.sf.net/'>"; + "<foo:e xmlns:foo='http://example.org/' bar:a='12'\n" + " xmlns:bar='http://example.org/'>"; const char *epilog = "</foo:e>"; const char *elemstr[] = { - "http://expat.sf.net/ e foo", - "http://expat.sf.net/ a bar" + "http://example.org/ e foo", + "http://example.org/ a bar" }; XML_SetReturnNSTriplet(parser, XML_TRUE); XML_SetUserData(parser, elemstr); @@ -2482,19 +6881,24 @@ START_TEST(test_return_ns_triplet) XML_SetNamespaceDeclHandler(parser, dummy_start_namespace_decl_handler, dummy_end_namespace_decl_handler); - triplet_count = 0; + triplet_start_flag = XML_FALSE; + triplet_end_flag = XML_FALSE; + dummy_handler_flags = 0; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR) xml_failure(parser); - if (triplet_count != 1) + if (!triplet_start_flag) fail("triplet_start_checker not invoked"); /* Check that unsetting "return triplets" fails while still parsing */ XML_SetReturnNSTriplet(parser, XML_FALSE); if (_XML_Parse_SINGLE_BYTES(parser, epilog, strlen(epilog), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); - if (triplet_count != 2) + if (!triplet_end_flag) fail("triplet_end_checker not invoked"); + if (dummy_handler_flags != (DUMMY_START_NS_DECL_HANDLER_FLAG | + DUMMY_END_NS_DECL_HANDLER_FLAG)) + fail("Namespace handlers not called"); } END_TEST @@ -2539,19 +6943,19 @@ run_ns_tagname_overwrite_test(const char *text, const char *result) START_TEST(test_ns_tagname_overwrite) { const char *text = - "<n:e xmlns:n='http://xml.libexpat.org/'>\n" + "<n:e xmlns:n='http://example.org/'>\n" " <n:f n:attr='foo'/>\n" " <n:g n:attr2='bar'/>\n" "</n:e>"; const char *result = - "start http://xml.libexpat.org/ e\n" - "start http://xml.libexpat.org/ f\n" - "attribute http://xml.libexpat.org/ attr\n" - "end http://xml.libexpat.org/ f\n" - "start http://xml.libexpat.org/ g\n" - "attribute http://xml.libexpat.org/ attr2\n" - "end http://xml.libexpat.org/ g\n" - "end http://xml.libexpat.org/ e\n"; + "start http://example.org/ e\n" + "start http://example.org/ f\n" + "attribute http://example.org/ attr\n" + "end http://example.org/ f\n" + "start http://example.org/ g\n" + "attribute http://example.org/ attr2\n" + "end http://example.org/ g\n" + "end http://example.org/ e\n"; run_ns_tagname_overwrite_test(text, result); } END_TEST @@ -2560,19 +6964,19 @@ END_TEST START_TEST(test_ns_tagname_overwrite_triplet) { const char *text = - "<n:e xmlns:n='http://xml.libexpat.org/'>\n" + "<n:e xmlns:n='http://example.org/'>\n" " <n:f n:attr='foo'/>\n" " <n:g n:attr2='bar'/>\n" "</n:e>"; const char *result = - "start http://xml.libexpat.org/ e n\n" - "start http://xml.libexpat.org/ f n\n" - "attribute http://xml.libexpat.org/ attr n\n" - "end http://xml.libexpat.org/ f n\n" - "start http://xml.libexpat.org/ g n\n" - "attribute http://xml.libexpat.org/ attr2 n\n" - "end http://xml.libexpat.org/ g n\n" - "end http://xml.libexpat.org/ e n\n"; + "start http://example.org/ e n\n" + "start http://example.org/ f n\n" + "attribute http://example.org/ attr n\n" + "end http://example.org/ f n\n" + "start http://example.org/ g n\n" + "attribute http://example.org/ attr2 n\n" + "end http://example.org/ g n\n" + "end http://example.org/ e n\n"; XML_SetReturnNSTriplet(parser, XML_TRUE); run_ns_tagname_overwrite_test(text, result); } @@ -2602,7 +7006,7 @@ START_TEST(test_start_ns_clears_start_element) syntax doesn't cause the problematic path through Expat to be taken. */ - const char *text = "<e xmlns='http://xml.libexpat.org/'></e>"; + const char *text = "<e xmlns='http://example.org/'></e>"; XML_SetStartElementHandler(parser, start_element_fail); XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element); @@ -2637,20 +7041,20 @@ external_entity_handler(XML_Parser parser, p2 = XML_ExternalEntityParserCreate(parser, context, NULL); if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(p2); - return 0; + return XML_STATUS_ERROR; } XML_ParserFree(p2); - return 1; + return XML_STATUS_OK; } START_TEST(test_default_ns_from_ext_subset_and_ext_ge) { const char *text = "<?xml version='1.0'?>\n" - "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n" - " <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n" + "<!DOCTYPE doc SYSTEM 'http://example.org/doc.dtd' [\n" + " <!ENTITY en SYSTEM 'http://example.org/entity.ent'>\n" "]>\n" - "<doc xmlns='http://xml.libexpat.org/ns1'>\n" + "<doc xmlns='http://example.org/ns1'>\n" "&en;\n" "</doc>"; @@ -2668,7 +7072,7 @@ END_TEST START_TEST(test_ns_prefix_with_empty_uri_1) { const char *text = - "<doc xmlns:prefix='http://xml.libexpat.org/'>\n" + "<doc xmlns:prefix='http://example.org/'>\n" " <e xmlns:prefix=''/>\n" "</doc>"; @@ -2716,14 +7120,14 @@ START_TEST(test_ns_prefix_with_empty_uri_4) "<!DOCTYPE doc [\n" " <!ELEMENT prefix:doc EMPTY>\n" " <!ATTLIST prefix:doc\n" - " xmlns:prefix CDATA 'http://xml.libexpat.org/'>\n" + " xmlns:prefix CDATA 'http://example.org/'>\n" "]>\n" "<prefix:doc/>"; /* Packaged info expected by the end element handler; the weird structuring lets us re-use the triplet_end_checker() function also used for another test. */ const char *elemstr[] = { - "http://xml.libexpat.org/ doc prefix" + "http://example.org/ doc prefix" }; XML_SetReturnNSTriplet(parser, XML_TRUE); XML_SetUserData(parser, elemstr); @@ -2733,12 +7137,36 @@ START_TEST(test_ns_prefix_with_empty_uri_4) } END_TEST +/* Test with non-xmlns prefix */ +START_TEST(test_ns_unbound_prefix) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ELEMENT prefix:doc EMPTY>\n" + " <!ATTLIST prefix:doc\n" + " notxmlns:prefix CDATA 'http://example.org/'>\n" + "]>\n" + "<prefix:doc/>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + fail("Unbound prefix incorrectly passed"); + if (XML_GetErrorCode(parser) != XML_ERROR_UNBOUND_PREFIX) + xml_failure(parser); +} +END_TEST + START_TEST(test_ns_default_with_empty_uri) { const char *text = - "<doc xmlns='http://xml.libexpat.org/'>\n" + "<doc xmlns='http://example.org/'>\n" " <e xmlns=''/>\n" "</doc>"; + /* Add some handlers to exercise extra code paths */ + XML_SetStartNamespaceDeclHandler(parser, + dummy_start_namespace_decl_handler); + XML_SetEndNamespaceDeclHandler(parser, + dummy_end_namespace_decl_handler); if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) xml_failure(parser); } @@ -2748,8 +7176,8 @@ END_TEST START_TEST(test_ns_duplicate_attrs_diff_prefixes) { const char *text = - "<doc xmlns:a='http://xml.libexpat.org/a'\n" - " xmlns:b='http://xml.libexpat.org/a'\n" + "<doc xmlns:a='http://example.org/a'\n" + " xmlns:b='http://example.org/a'\n" " a:a='v' b:a='v' />"; expect_failure(text, XML_ERROR_DUPLICATE_ATTRIBUTE, @@ -2757,6 +7185,33 @@ START_TEST(test_ns_duplicate_attrs_diff_prefixes) } END_TEST +START_TEST(test_ns_duplicate_hashes) +{ + /* The hash of an attribute is calculated as the hash of its URI + * concatenated with a space followed by its name (after the + * colon). We wish to generate attributes with the same hash + * value modulo the attribute table size so that we can check that + * the attribute hash table works correctly. The attribute hash + * table size will be the smallest power of two greater than the + * number of attributes, but at least eight. There is + * unfortunately no programmatic way of getting the hash or the + * table size at user level, but the test code coverage percentage + * will drop if the hashes cease to point to the same row. + * + * The cunning plan is to have few enough attributes to have a + * reliable table size of 8, and have the single letter attribute + * names be 8 characters apart, producing a hash which will be the + * same modulo 8. + */ + const char *text = + "<doc xmlns:a='http://example.org/a'\n" + " a:a='v' a:i='w' />"; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + /* Regression test for SF bug #695401: unbound prefix. */ START_TEST(test_ns_unbound_prefix_on_attribute) { @@ -2799,26 +7254,366 @@ START_TEST(test_ns_parser_reset) } END_TEST +/* Test that long element names with namespaces are handled correctly */ +START_TEST(test_ns_long_element) +{ + const char *text = + "<foo:thisisalongenoughelementnametotriggerareallocation\n" + " xmlns:foo='http://example.org/' bar:a='12'\n" + " xmlns:bar='http://example.org/'>" + "</foo:thisisalongenoughelementnametotriggerareallocation>"; + const char *elemstr[] = { + "http://example.org/" + " thisisalongenoughelementnametotriggerareallocation foo", + "http://example.org/ a bar" + }; + + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test mixed population of prefixed and unprefixed attributes */ +START_TEST(test_ns_mixed_prefix_atts) +{ + const char *text = + "<e a='12' bar:b='13'\n" + " xmlns:bar='http://example.org/'>" + "</e>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test having a long namespaced element name inside a short one. + * This exercises some internal buffer reallocation that is shared + * across elements with the same namespace URI. + */ +START_TEST(test_ns_extend_uri_buffer) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/'>" + " <foo:thisisalongenoughnametotriggerallocationaction" + " foo:a='12' />" + "</foo:e>"; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test that xmlns is correctly rejected as an attribute in the xmlns + * namespace, but not in other namespaces + */ +START_TEST(test_ns_reserved_attributes) +{ + const char *text1 = + "<foo:e xmlns:foo='http://example.org/' xmlns:xmlns='12' />"; + const char *text2 = + "<foo:e xmlns:foo='http://example.org/' foo:xmlns='12' />"; + expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XMLNS, + "xmlns not rejected as an attribute"); + XML_ParserReset(parser, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test more reserved attributes */ +START_TEST(test_ns_reserved_attributes_2) +{ + const char *text1 = + "<foo:e xmlns:foo='http://example.org/'" + " xmlns:xml='http://example.org/' />"; + const char *text2 = + "<foo:e xmlns:foo='http://www.w3.org/XML/1998/namespace' />"; + const char *text3 = + "<foo:e xmlns:foo='http://www.w3.org/2000/xmlns/' />"; + + expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XML, + "xml not rejected as an attribute"); + XML_ParserReset(parser, NULL); + expect_failure(text2, XML_ERROR_RESERVED_NAMESPACE_URI, + "Use of w3.org URL not faulted"); + XML_ParserReset(parser, NULL); + expect_failure(text3, XML_ERROR_RESERVED_NAMESPACE_URI, + "Use of w3.org xmlns URL not faulted"); +} +END_TEST + +/* Test string pool handling of namespace names of 2048 characters */ +/* Exercises a particular string pool growth path */ +START_TEST(test_ns_extremely_long_prefix) +{ + const char *text = + "<doc " + /* 64 character on each line */ + /* ...gives a total length of 2048 */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ":a='12' xmlns:" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "='foo'\n>" + "</doc>"; + + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_FALSE) == XML_STATUS_ERROR) + xml_failure(parser); +} +END_TEST + +/* Test unknown encoding handlers in namespace setup */ +START_TEST(test_ns_unknown_encoding_success) +{ + const char *text = + "<?xml version='1.0' encoding='prefix-conv'?>\n" + "<foo:e xmlns:foo='http://example.org/'>Hi</foo:e>"; + + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + run_character_check(text, "Hi"); +} +END_TEST + +/* Test that too many colons are rejected */ +START_TEST(test_ns_double_colon) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/' foo:a:b='bar' />"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Double colon in attribute name not faulted"); +} +END_TEST + +START_TEST(test_ns_double_colon_element) +{ + const char *text = + "<foo:bar:e xmlns:foo='http://example.org/' />"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Double colon in element name not faulted"); +} +END_TEST + +/* Test that non-name characters after a colon are rejected */ +START_TEST(test_ns_bad_attr_leafname) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/' foo:?ar='baz' />"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in leafname not faulted"); +} +END_TEST + +START_TEST(test_ns_bad_element_leafname) +{ + const char *text = + "<foo:?oc xmlns:foo='http://example.org/' />"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in element leafname not faulted"); +} +END_TEST + +/* Test high-byte-set UTF-16 characters are valid in a leafname */ +START_TEST(test_ns_utf16_leafname) +{ + const char text[] = + /* <n:e xmlns:n='URI' n:{KHO KHWAI}='a' /> + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "<\0n\0:\0e\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0 \0" + "n\0:\0\x04\x0e=\0'\0a\0'\0 \0/\0>\0"; + const XML_Char *expected = "a"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, accumulate_attribute); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_utf16_element_leafname) +{ + const char text[] = + /* <n:{KHO KHWAI} xmlns:n='URI'/> + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "\0<\0n\0:\x0e\x04\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0/\0>"; + const XML_Char *expected = "URI \xe0\xb8\x84"; + CharData storage; + + CharData_Init(&storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + XML_SetUserData(parser, &storage); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_utf16_doctype) +{ + const char text[] = + /* <!DOCTYPE foo:{KHO KHWAI} [ <!ENTITY bar 'baz'> ]>\n + * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 + */ + "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0f\0o\0o\0:\x0e\x04\0 " + "\0[\0 \0<\0!\0E\0N\0T\0I\0T\0Y\0 \0b\0a\0r\0 \0'\0b\0a\0z\0'\0>\0 " + "\0]\0>\0\n" + /* <foo:{KHO KHWAI} xmlns:foo='URI'>&bar;</foo:{KHO KHWAI}> */ + "\0<\0f\0o\0o\0:\x0e\x04\0 " + "\0x\0m\0l\0n\0s\0:\0f\0o\0o\0=\0'\0U\0R\0I\0'\0>" + "\0&\0b\0a\0r\0;" + "\0<\0/\0f\0o\0o\0:\x0e\x04\0>"; + const XML_Char *expected = "URI \xe0\xb8\x84"; + CharData storage; + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetStartElementHandler(parser, start_element_event_handler); + XML_SetUnknownEncodingHandler(parser, MiscEncodingHandler, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + +START_TEST(test_ns_invalid_doctype) +{ + const char *text = + "<!DOCTYPE foo:!bad [ <!ENTITY bar 'baz' ]>\n" + "<foo:!bad>&bar;</foo:!bad>"; + + expect_failure(text, XML_ERROR_INVALID_TOKEN, + "Invalid character in document local name not faulted"); +} +END_TEST + +START_TEST(test_ns_double_colon_doctype) +{ + const char *text = + "<!DOCTYPE foo:a:doc [ <!ENTITY bar 'baz' ]>\n" + "<foo:a:doc>&bar;</foo:a:doc>"; + + expect_failure(text, XML_ERROR_SYNTAX, + "Double colon in document name not faulted"); +} +END_TEST + /* Control variable; the number of times duff_allocator() will successfully allocate */ -static unsigned int allocation_count = 0; +#define ALLOC_ALWAYS_SUCCEED (-1) +#define REALLOC_ALWAYS_SUCCEED (-1) + +static int allocation_count = ALLOC_ALWAYS_SUCCEED; +static int reallocation_count = REALLOC_ALWAYS_SUCCEED; /* Crocked allocator for allocation failure tests */ static void *duff_allocator(size_t size) { if (allocation_count == 0) return NULL; - allocation_count--; + if (allocation_count != ALLOC_ALWAYS_SUCCEED) + allocation_count--; return malloc(size); } +/* Crocked reallocator for allocation failure tests */ +static void *duff_reallocator(void *ptr, size_t size) +{ + if (reallocation_count == 0) + return NULL; + if (reallocation_count != REALLOC_ALWAYS_SUCCEED) + reallocation_count--; + return realloc(ptr, size); +} + /* Test that a failure to allocate the parser structure fails gracefully */ START_TEST(test_misc_alloc_create_parser) { XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; unsigned int i; + const unsigned int max_alloc_count = 10; /* Something this simple shouldn't need more than 10 allocations */ - for (i = 0; i < 10; i++) + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); @@ -2827,8 +7622,8 @@ START_TEST(test_misc_alloc_create_parser) } if (i == 0) fail("Parser unexpectedly ignored failing allocator"); - else if (i == 10) - fail("Parser not created with allocation count 10"); + else if (i == max_alloc_count) + fail("Parser not created with max allocation count"); } END_TEST @@ -2837,9 +7632,10 @@ START_TEST(test_misc_alloc_create_parser_with_encoding) { XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; unsigned int i; + const unsigned int max_alloc_count = 10; /* Try several levels of allocation */ - for (i = 0; i < 10; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; parser = XML_ParserCreate_MM("us-ascii", &memsuite, NULL); if (parser != NULL) @@ -2847,117 +7643,8 @@ START_TEST(test_misc_alloc_create_parser_with_encoding) } if (i == 0) fail("Parser ignored failing allocator"); - else if (i == 10) - fail("Parser not created with allocation count 10"); -} -END_TEST - -/* Test the effects of allocation failure in simple namespace parsing. - * Based on test_ns_default_with_empty_uri() - */ -START_TEST(test_misc_alloc_ns) -{ - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - const char *text = - "<doc xmlns='http://xml.libexpat.org/'>\n" - " <e xmlns=''/>\n" - "</doc>"; - unsigned int i; - int repeated = 0; - XML_Char ns_sep[2] = { ' ', '\0' }; - - allocation_count = 10000; - parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); - if (parser == NULL) { - fail("Parser not created"); - } else { - for (i = 0; i < 10; i++) { - /* Repeat some tests with the same allocation count to - * catch cached allocations not freed by XML_ParserReset() - */ - if (repeated < 2 && i == 3) { - i--; - repeated++; - } - if (repeated == 2 && i == 5) { - i = 3; - repeated++; - } - allocation_count = i; - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - break; - XML_ParserReset(parser, NULL); - } - if (i == 0) - fail("Parsing worked despite failing allocations"); - else if (i == 10) - fail("Parsing failed even at allocation count 10"); - } -} -END_TEST - -/* Test XML_ParseBuffer interface with namespace and a dicky allocator */ -START_TEST(test_misc_alloc_ns_parse_buffer) -{ - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; - XML_Char ns_sep[2] = { ' ', '\0' }; - const char *text = "<doc>Hello</doc>"; - void *buffer; - - /* Make sure the basic parser is allocated */ - allocation_count = 10000; - parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); - if (parser == NULL) - fail("Parser not created"); - - /* Try a parse before the start of the world */ - /* (Exercises new code path) */ - allocation_count = 0; - if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_ERROR) - fail("Pre-init XML_ParseBuffer not faulted"); - if (XML_GetErrorCode(parser) != XML_ERROR_NO_MEMORY) - fail("Pre-init XML_ParseBuffer faulted for wrong reason"); - - /* Now with actual memory allocation */ - allocation_count = 10000; - if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) - xml_failure(parser); - - /* Check that resuming an unsuspended parser is faulted */ - if (XML_ResumeParser(parser) != XML_STATUS_ERROR) - fail("Resuming unsuspended parser not faulted"); - if (XML_GetErrorCode(parser) != XML_ERROR_NOT_SUSPENDED) - xml_failure(parser); - - /* Get the parser into suspended state */ - XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler); - resumable = XML_TRUE; - buffer = XML_GetBuffer(parser, strlen(text)); - if (buffer == NULL) - fail("Could not acquire parse buffer"); - memcpy(buffer, text, strlen(text)); - if (XML_ParseBuffer(parser, strlen(text), - XML_TRUE) != XML_STATUS_SUSPENDED) - xml_failure(parser); - if (XML_GetErrorCode(parser) != XML_ERROR_NONE) - xml_failure(parser); - if (XML_ParseBuffer(parser, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - fail("Suspended XML_ParseBuffer not faulted"); - if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED) - xml_failure(parser); - if (XML_GetBuffer(parser, strlen(text)) != NULL) - fail("Suspended XML_GetBuffer not faulted"); - - /* Get it going again and complete the world */ - XML_SetCharacterDataHandler(parser, NULL); - if (XML_ResumeParser(parser) != XML_STATUS_OK) - xml_failure(parser); - if (XML_ParseBuffer(parser, strlen(text), XML_TRUE) != XML_STATUS_ERROR) - fail("Post-finishing XML_ParseBuffer not faulted"); - if (XML_GetErrorCode(parser) != XML_ERROR_FINISHED) - xml_failure(parser); - if (XML_GetBuffer(parser, strlen(text)) != NULL) - fail("Post-finishing XML_GetBuffer not faulted"); + else if (i == max_alloc_count) + fail("Parser not created with max allocation count"); } END_TEST @@ -2981,38 +7668,107 @@ START_TEST(test_misc_error_string) END_TEST /* Test the version information is consistent */ + +/* Since we are working in XML_LChars (potentially 16-bits), we + * can't use the standard C library functions for character + * manipulation and have to roll our own. + */ +static int +parse_version(const XML_LChar *version_text, + XML_Expat_Version *version_struct) +{ + while (*version_text != 0x00) { + if (*version_text >= ASCII_0 && *version_text <= ASCII_9) + break; + version_text++; + } + if (*version_text == 0x00) + return XML_FALSE; + + /* version_struct->major = strtoul(version_text, 10, &version_text) */ + version_struct->major = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->major = + 10 * version_struct->major + (*version_text++ - ASCII_0); + } + if (*version_text++ != ASCII_PERIOD) + return XML_FALSE; + + /* Now for the minor version number */ + version_struct->minor = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->minor = + 10 * version_struct->minor + (*version_text++ - ASCII_0); + } + if (*version_text++ != ASCII_PERIOD) + return XML_FALSE; + + /* Finally the micro version number */ + version_struct->micro = 0; + while (*version_text >= ASCII_0 && *version_text <= ASCII_9) { + version_struct->micro = + 10 * version_struct->micro + (*version_text++ - ASCII_0); + } + if (*version_text != 0x00) + return XML_FALSE; + return XML_TRUE; +} + +static int +versions_equal(const XML_Expat_Version *first, + const XML_Expat_Version *second) +{ + return (first->major == second->major && + first->minor == second->minor && + first->micro == second->micro); +} + START_TEST(test_misc_version) { - XML_Expat_Version version_struct = XML_ExpatVersionInfo(); + XML_Expat_Version read_version = XML_ExpatVersionInfo(); + /* Silence compiler warning with the following assignment */ + XML_Expat_Version parsed_version = { 0, 0, 0 }; const XML_LChar *version_text = XML_ExpatVersion(); - long value; - const char *p; - char *endp; if (version_text == NULL) fail("Could not obtain version text"); - for (p = version_text; *p != '\0'; p++) - if (isdigit(*p)) - break; - if (*p == '\0') - fail("No numbers in version text"); - value = strtoul(p, &endp, 10); - if (*endp != '.') - fail("Major version conversion from text failed"); - if (value != version_struct.major) - fail("Major version mismatch"); - p = endp + 1; - value = strtoul(p, &endp, 10); - if (*endp != '.') - fail("Minor version conversion from text failed"); - if (value != version_struct.minor) - fail("Minor version mismatch"); - p = endp + 1; - value = strtoul(p, &endp, 10); - if (*endp != '\0') - fail("Micro version conversion from text failed"); - if (value != version_struct.micro) - fail("Micro version mismatch"); + if (!parse_version(version_text, &parsed_version)) + fail("Unable to parse version text"); + if (!versions_equal(&read_version, &parsed_version)) + fail("Version mismatch"); + +#if ! defined(XML_UNICODE) + if (strcmp(version_text, "expat_2.2.3")) /* needs bump on releases */ + fail("XML_*_VERSION in expat.h out of sync?\n"); +#endif /* ! defined(XML_UNICODE) */ +} +END_TEST + +/* Test feature information */ +START_TEST(test_misc_features) +{ + const XML_Feature *features = XML_GetFeatureList(); + + /* Prevent problems with double-freeing parsers */ + parser = NULL; + if (features == NULL) + fail("Failed to get feature information"); + /* Loop through the features checking what we can */ + while (features->feature != XML_FEATURE_END) { + switch(features->feature) { + case XML_FEATURE_SIZEOF_XML_CHAR: + if (features->value != sizeof(XML_Char)) + fail("Incorrect size of XML_Char"); + break; + case XML_FEATURE_SIZEOF_XML_LCHAR: + if (features->value != sizeof(XML_LChar)) + fail("Incorrect size of XML_LChar"); + break; + default: + break; + } + features++; + } } END_TEST @@ -3040,14 +7796,44 @@ START_TEST(test_misc_attribute_leak) } END_TEST +/* Test parser created for UTF-16LE is successful */ +START_TEST(test_misc_utf16le) +{ + const char text[] = + /* <?xml version='1.0'?><q>Hi</q> */ + "<\0?\0x\0m\0l\0 \0" + "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0" + "<\0q\0>\0H\0i\0<\0/\0q\0>\0"; + const XML_Char *expected = "Hi"; + CharData storage; + + parser = XML_ParserCreate("UTF-16LE"); + if (parser == NULL) + fail("Parser not created"); + + CharData_Init(&storage); + XML_SetUserData(parser, &storage); + XML_SetCharacterDataHandler(parser, accumulate_characters); + if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + CharData_CheckXMLChars(&storage, expected); +} +END_TEST + static void alloc_setup(void) { - XML_Memory_Handling_Suite memsuite = { duff_allocator, realloc, free }; + XML_Memory_Handling_Suite memsuite = { + duff_allocator, + duff_reallocator, + free + }; /* Ensure the parser creation will go through */ - allocation_count = 10000; + allocation_count = ALLOC_ALWAYS_SUCCEED; + reallocation_count = REALLOC_ALWAYS_SUCCEED; parser = XML_ParserCreate_MM(NULL, &memsuite, NULL); if (parser == NULL) fail("Parser not created"); @@ -3059,6 +7845,252 @@ alloc_teardown(void) basic_teardown(); } + +/* Test the effects of allocation failures on xml declaration processing */ +START_TEST(test_alloc_parse_xdecl) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<doc>Hello, world</doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* Resetting the parser is insufficient, because some memory + * allocations are cached within the parser. Instead we use + * the teardown and setup routines to ensure that we have the + * right sort of parser back in our hands. + */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +/* As above, but with an encoding big enough to cause storing the + * version information to expand the string pool being used. + */ +static int XMLCALL +long_encoding_handler(void *UNUSED_P(userData), + const XML_Char *UNUSED_P(encoding), + XML_Encoding *info) +{ + int i; + + for (i = 0; i < 256; i++) + info->map[i] = i; + info->data = NULL; + info->convert = NULL; + info->release = NULL; + return XML_STATUS_OK; +} + +START_TEST(test_alloc_parse_xdecl_2) +{ + const char *text = + "<?xml version='1.0' encoding='" + /* Each line is 64 characters */ + "ThisIsAStupidlyLongEncodingNameIntendedToTriggerPoolGrowth123456" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMN" + "'?>" + "<doc>Hello, world</doc>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetXmlDeclHandler(parser, dummy_xdecl_handler); + XML_SetUnknownEncodingHandler(parser, long_encoding_handler, NULL); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +/* Test the effects of allocation failures on a straightforward parse */ +START_TEST(test_alloc_parse_pi) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<?pi unknown?>\n" + "<doc>" + "Hello, world" + "</doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_pi_2) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<doc>" + "Hello, world" + "<?pi unknown?>\n" + "</doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_pi_3) +{ + const char *text = + "<?" + /* 64 characters per line */ + "This processing instruction should be long enough to ensure that" + "it triggers the growth of an internal string pool when the " + "allocator fails at a cruicial moment FGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "Q?><doc/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_comment) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<!-- Test parsing this comment -->" + "<doc>Hi</doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + +START_TEST(test_alloc_parse_comment_2) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<doc>" + "Hello, world" + "<!-- Parse this comment too -->" + "</doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed with max allocations"); +} +END_TEST + static int XMLCALL external_entity_duff_loader(XML_Parser parser, const XML_Char *context, @@ -3068,9 +8100,10 @@ external_entity_duff_loader(XML_Parser parser, { XML_Parser new_parser; unsigned int i; + const unsigned int max_alloc_count = 10; /* Try a few different allocation levels */ - for (i = 0; i < 10; i++) + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); @@ -3082,11 +8115,11 @@ external_entity_duff_loader(XML_Parser parser, } if (i == 0) fail("External parser creation ignored failing allocator"); - else if (i == 10) - fail("Extern parser not created with allocation count 10"); + else if (i == max_alloc_count) + fail("Extern parser not created with max allocation count"); /* Make sure other random allocation doesn't now fail */ - allocation_count = 10000; + allocation_count = ALLOC_ALWAYS_SUCCEED; /* Make sure the failure code path is executed too */ return XML_STATUS_ERROR; @@ -3114,16 +8147,6 @@ START_TEST(test_alloc_create_external_parser) } END_TEST -static int XMLCALL -external_entity_null_loader(XML_Parser UNUSED_P(parser), - const XML_Char *UNUSED_P(context), - const XML_Char *UNUSED_P(base), - const XML_Char *UNUSED_P(systemId), - const XML_Char *UNUSED_P(publicId)) -{ - return XML_STATUS_OK; -} - /* More external parser memory allocation testing */ START_TEST(test_alloc_run_external_parser) { @@ -3134,8 +8157,9 @@ START_TEST(test_alloc_run_external_parser) char foo_text[] = "<!ELEMENT doc (#PCDATA)*>"; unsigned int i; + const unsigned int max_alloc_count = 15; - for (i = 0; i < 10; i++) { + for (i = 0; i < max_alloc_count; i++) { XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetUserData(parser, foo_text); @@ -3144,12 +8168,13 @@ START_TEST(test_alloc_run_external_parser) allocation_count = i; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; - /* Re-use the parser */ - XML_ParserReset(parser, NULL); + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } if (i == 0) fail("Parsing ignored failing allocator"); - else if (i == 10) + else if (i == max_alloc_count) fail("Parsing failed with allocation count 10"); } END_TEST @@ -3166,6 +8191,7 @@ external_entity_dbl_handler(XML_Parser parser, const char *text; XML_Parser new_parser; int i; + const int max_alloc_count = 20; if (callno == 0) { /* First time through, check how many calls to malloc occur */ @@ -3176,7 +8202,7 @@ external_entity_dbl_handler(XML_Parser parser, new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); if (new_parser == NULL) { fail("Unable to allocate first external parser"); - return 0; + return XML_STATUS_ERROR; } /* Stash the number of calls in the user data */ XML_SetUserData(parser, (void *)(intptr_t)(10000 - allocation_count)); @@ -3184,7 +8210,7 @@ external_entity_dbl_handler(XML_Parser parser, text = ("<?xml version='1.0' encoding='us-ascii'?>" "<e/>"); /* Try at varying levels to exercise more code paths */ - for (i = 0; i < 20; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = callno + i; new_parser = XML_ExternalEntityParserCreate(parser, context, @@ -3195,21 +8221,21 @@ external_entity_dbl_handler(XML_Parser parser, if (i == 0) { fail("Second external parser unexpectedly created"); XML_ParserFree(new_parser); - return 0; + return XML_STATUS_ERROR; } - else if (i == 20) { + else if (i == max_alloc_count) { fail("Second external parser not created"); - return 0; + return XML_STATUS_ERROR; } } - allocation_count = 10000; + allocation_count = ALLOC_ALWAYS_SUCCEED; if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) { xml_failure(new_parser); - return 0; + return XML_STATUS_ERROR; } XML_ParserFree(new_parser); - return 1; + return XML_STATUS_OK; } /* Test that running out of memory in dtdCopy is correctly reported. @@ -3219,10 +8245,10 @@ START_TEST(test_alloc_dtd_copy_default_atts) { const char *text = "<?xml version='1.0'?>\n" - "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n" - " <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n" + "<!DOCTYPE doc SYSTEM 'http://example.org/doc.dtd' [\n" + " <!ENTITY en SYSTEM 'http://example.org/entity.ent'>\n" "]>\n" - "<doc xmlns='http://xml.libexpat.org/ns1'>\n" + "<doc xmlns='http://example.org/ns1'>\n" "&en;\n" "</doc>"; @@ -3246,7 +8272,7 @@ external_entity_dbl_handler_2(XML_Parser parser, intptr_t callno = (intptr_t)XML_GetUserData(parser); const char *text; XML_Parser new_parser; - int i; + enum XML_Status rv; if (callno == 0) { /* Try different allocation levels for whole exercise */ @@ -3254,49 +8280,27 @@ external_entity_dbl_handler_2(XML_Parser parser, "<!ATTLIST doc xmlns CDATA #IMPLIED>\n" "<!ELEMENT e EMPTY>\n"); XML_SetUserData(parser, (void *)(intptr_t)1); - for (i = 0; i < 20; i++) { - allocation_count = i; - new_parser = XML_ExternalEntityParserCreate(parser, - context, - NULL); - if (new_parser == NULL) - continue; - if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), - XML_TRUE) != XML_STATUS_ERROR) - break; - XML_ParserFree(new_parser); - } - - /* Ensure future allocations will be well */ - allocation_count = 10000; - if (i == 0) { - fail("first external parser unexpectedly created"); - XML_ParserFree(new_parser); - return 0; - } - else if (i == 20) { - fail("first external parser not allocated with count 20"); - return 0; - } + new_parser = XML_ExternalEntityParserCreate(parser, + context, + NULL); + if (new_parser == NULL) + return XML_STATUS_ERROR; + rv = _XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), + XML_TRUE); } else { /* Just run through once */ text = ("<?xml version='1.0' encoding='us-ascii'?>" "<e/>"); - allocation_count = 10000; new_parser = XML_ExternalEntityParserCreate(parser, context, NULL); - if (new_parser == NULL) { - fail("Unable to create second external parser"); - return 0; - } - if (_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) { - xml_failure(new_parser); - XML_ParserFree(new_parser); - return 0; - } + if (new_parser == NULL) + return XML_STATUS_ERROR; + rv =_XML_Parse_SINGLE_BYTES(new_parser, text, strlen(text), + XML_TRUE); } XML_ParserFree(new_parser); - return 1; + if (rv == XML_STATUS_ERROR) + return XML_STATUS_ERROR; + return XML_STATUS_OK; } /* Test more external entity allocation failure paths */ @@ -3304,23 +8308,95 @@ START_TEST(test_alloc_external_entity) { const char *text = "<?xml version='1.0'?>\n" - "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n" - " <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n" + "<!DOCTYPE doc SYSTEM 'http://example.org/doc.dtd' [\n" + " <!ENTITY en SYSTEM 'http://example.org/entity.ent'>\n" "]>\n" - "<doc xmlns='http://xml.libexpat.org/ns1'>\n" + "<doc xmlns='http://example.org/ns1'>\n" "&en;\n" "</doc>"; + int i; + const int alloc_test_max_repeats = 50; - XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); - XML_SetExternalEntityRefHandler(parser, - external_entity_dbl_handler_2); - XML_SetUserData(parser, NULL); - if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), - XML_TRUE) == XML_STATUS_ERROR) - xml_failure(parser); + for (i = 0; i < alloc_test_max_repeats; i++) { + allocation_count = -1; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, + external_entity_dbl_handler_2); + XML_SetUserData(parser, NULL); + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + allocation_count = -1; + if (i == 0) + fail("External entity parsed despite duff allocator"); + if (i == alloc_test_max_repeats) + fail("External entity not parsed at max allocation count"); } END_TEST +/* Test more allocation failure paths */ +static int XMLCALL +external_entity_alloc_set_encoding(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + /* As for external_entity_loader() */ + const char *text = + "<?xml encoding='iso-8859-3'?>" + "\xC3\xA9"; + XML_Parser ext_parser; + enum XML_Status status; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + if (!XML_SetEncoding(ext_parser, "utf-8")) { + XML_ParserFree(ext_parser); + return XML_STATUS_ERROR; + } + status = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + if (status == XML_STATUS_ERROR) + return XML_STATUS_ERROR; + return XML_STATUS_OK; +} + +START_TEST(test_alloc_ext_entity_set_encoding) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + int i; + const int max_allocation_count = 30; + + for (i = 0; i < max_allocation_count; i++) { + XML_SetExternalEntityRefHandler(parser, + external_entity_alloc_set_encoding); + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + allocation_count = -1; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Encoding check succeeded despite failing allocator"); + if (i == max_allocation_count) + fail("Encoding failed at max allocation count"); +} +END_TEST static int XMLCALL unknown_released_encoding_handler(void *UNUSED_P(data), @@ -3350,26 +8426,23 @@ START_TEST(test_alloc_internal_entity) "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" "<test a='&foo;'/>"; unsigned int i; - int repeated = 0; + const unsigned int max_alloc_count = 20; - for (i = 0; i < 10; i++) { - /* Again, repeat some counts to account for caching */ - if (repeated < 2 && i == 2) { - i--; - repeated++; - } + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; XML_SetUnknownEncodingHandler(parser, unknown_released_encoding_handler, NULL); - allocation_count = i; if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; - XML_ParserReset(parser, NULL); + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } if (i == 0) fail("Internal entity worked despite failing allocations"); - else if (i == 10) - fail("Internal entity failed at allocation count 10"); + else if (i == max_alloc_count) + fail("Internal entity failed at max allocation count"); } END_TEST @@ -3381,26 +8454,23 @@ START_TEST(test_alloc_dtd_default_handling) { const char *text = "<!DOCTYPE doc [\n" - "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n" - "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n" - "<!ELEMENT doc EMPTY>\n" + "<!ENTITY e SYSTEM 'http://example.org/e'>\n" + "<!NOTATION n SYSTEM 'http://example.org/n'>\n" + "<!ENTITY e1 SYSTEM 'http://example.org/e' NDATA n>\n" + "<!ELEMENT doc (#PCDATA)>\n" "<!ATTLIST doc a CDATA #IMPLIED>\n" "<?pi in dtd?>\n" "<!--comment in dtd-->\n" - "]><doc/>"; - const char *expected = "\n\n\n\n\n\n\n<doc/>"; + "]>\n" + "<doc><![CDATA[text in doc]]></doc>"; + const char *expected = "\n\n\n\n\n\n\n\n\n<doc>text in doc</doc>"; CharData storage; int i; - int repeat = 0; + const int max_alloc_count = 25; - for (i = 0; i < 10; i++) { - /* Repeat some counts to catch cached allocations */ - if ((repeat < 4 && i == 2) || - (repeat == 4 && i == 3)) { - i--; - repeat++; - } + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; + dummy_handler_flags = 0; XML_SetDefaultHandler(parser, accumulate_characters); XML_SetDoctypeDeclHandler(parser, dummy_start_doctype_handler, @@ -3423,15 +8493,27 @@ START_TEST(test_alloc_dtd_default_handling) if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR) break; - XML_ParserReset(parser, NULL); + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); } - if (i == 0) { + if (i == 0) fail("Default DTD parsed despite allocation failures"); - } else if (i == 10) { - fail("Default DTD not parsed with alloc count 10"); - } else { - CharData_CheckXMLChars(&storage, expected); - } + if (i == max_alloc_count) + fail("Default DTD not parsed with maximum alloc count"); + CharData_CheckXMLChars(&storage, expected); + if (dummy_handler_flags != (DUMMY_START_DOCTYPE_HANDLER_FLAG | + DUMMY_END_DOCTYPE_HANDLER_FLAG | + DUMMY_ENTITY_DECL_HANDLER_FLAG | + DUMMY_NOTATION_DECL_HANDLER_FLAG | + DUMMY_ELEMENT_DECL_HANDLER_FLAG | + DUMMY_ATTLIST_DECL_HANDLER_FLAG | + DUMMY_COMMENT_HANDLER_FLAG | + DUMMY_PI_HANDLER_FLAG | + DUMMY_START_CDATA_HANDLER_FLAG | + DUMMY_END_CDATA_HANDLER_FLAG | + DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG)) + fail("Not all handlers were called"); } END_TEST @@ -3439,16 +8521,17 @@ END_TEST START_TEST(test_alloc_explicit_encoding) { int i; + const int max_alloc_count = 5; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (XML_SetEncoding(parser, "us-ascii") == XML_STATUS_OK) break; } if (i == 0) fail("Encoding set despite failing allocator"); - else if (i == 5) - fail("Encoding not set at allocation count 5"); + else if (i == max_alloc_count) + fail("Encoding not set at max allocation count"); } END_TEST @@ -3457,19 +8540,3104 @@ START_TEST(test_alloc_set_base) { const XML_Char *new_base = "/local/file/name.xml"; int i; + const int max_alloc_count = 5; - for (i = 0; i < 5; i++) { + for (i = 0; i < max_alloc_count; i++) { allocation_count = i; if (XML_SetBase(parser, new_base) == XML_STATUS_OK) break; } if (i == 0) fail("Base set despite failing allocator"); - else if (i == 5) - fail("Base not set with allocation count 5"); + else if (i == max_alloc_count) + fail("Base not set with max allocation count"); +} +END_TEST + +/* Test buffer extension in the face of a duff reallocator */ +START_TEST(test_alloc_realloc_buffer) +{ + const char *text = get_buffer_test_text; + void *buffer; + int i; + const int max_realloc_count = 10; + + /* Get a smallish buffer */ + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + buffer = XML_GetBuffer(parser, 1536); + if (buffer == NULL) + fail("1.5K buffer reallocation failed"); + memcpy(buffer, text, strlen(text)); + if (XML_ParseBuffer(parser, strlen(text), + XML_FALSE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + reallocation_count = -1; + if (i == 0) + fail("Parse succeeded with no reallocation"); + else if (i == max_realloc_count) + fail("Parse failed with max reallocation count"); +} +END_TEST + +/* Same test for external entity parsers */ +static int XMLCALL +external_entity_reallocator(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = get_buffer_test_text; + XML_Parser ext_parser; + void *buffer; + enum XML_Status status; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + reallocation_count = (intptr_t)XML_GetUserData(parser); + buffer = XML_GetBuffer(ext_parser, 1536); + if (buffer == NULL) + fail("Buffer allocation failed"); + memcpy(buffer, text, strlen(text)); + status = XML_ParseBuffer(ext_parser, strlen(text), XML_FALSE); + reallocation_count = -1; + XML_ParserFree(ext_parser); + return (status == XML_STATUS_OK) ? XML_STATUS_OK : XML_STATUS_ERROR; +} + +START_TEST(test_alloc_ext_entity_realloc_buffer) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" + "]>\n" + "<doc>&en;</doc>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + XML_SetExternalEntityRefHandler(parser, + external_entity_reallocator); + XML_SetUserData(parser, (void *)(intptr_t)i); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) == XML_STATUS_OK) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Succeeded with no reallocations"); + if (i == max_realloc_count) + fail("Failed with max reallocations"); +} +END_TEST + +/* Test elements with many attributes are handled correctly */ +START_TEST(test_alloc_realloc_many_attributes) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ATTLIST doc za CDATA 'default'>\n" + "<!ATTLIST doc zb CDATA 'def2'>\n" + "<!ATTLIST doc zc CDATA 'def3'>\n" + "]>\n" + "<doc a='1'" + " b='2'" + " c='3'" + " d='4'" + " e='5'" + " f='6'" + " g='7'" + " h='8'" + " i='9'" + " j='10'" + " k='11'" + " l='12'" + " m='13'" + " n='14'" + " p='15'" + " q='16'" + " r='17'" + " s='18'>" + "</doc>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite no reallocations"); + if (i == max_realloc_count) + fail("Parse failed at max reallocations"); } END_TEST +/* Test handling of a public entity with failing allocator */ +START_TEST(test_alloc_public_entity_value) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc></doc>\n"; + char dtd_text[] = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % e1 PUBLIC 'foo' 'bar.ent'>\n" + "<!ENTITY % " + /* Each line is 64 characters */ + "ThisIsAStupidlyLongParameterNameIntendedToTriggerPoolGrowth12345" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + " '%e1;'>\n" + "%e1;\n"; + int i; + const int max_alloc_count = 50; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + /* Provoke a particular code path */ + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocation"); + if (i == max_alloc_count) + fail("Parsing failed at max allocation count"); + if (dummy_handler_flags != DUMMY_ENTITY_DECL_HANDLER_FLAG) + fail("Entity declaration handler not called"); +} +END_TEST + +START_TEST(test_alloc_realloc_subst_public_entity_value) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc></doc>\n"; + char dtd_text[] = + "<!ELEMENT doc EMPTY>\n" + "<!ENTITY % " + /* Each line is 64 characters */ + "ThisIsAStupidlyLongParameterNameIntendedToTriggerPoolGrowth12345" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + " PUBLIC 'foo' 'bar.ent'>\n" + "%ThisIsAStupidlyLongParameterNameIntendedToTriggerPoolGrowth12345" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP;"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_public); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocation"); + if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +START_TEST(test_alloc_parse_public_doctype) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<!DOCTYPE doc PUBLIC '" + /* 64 characters per line */ + "http://example.com/a/long/enough/name/to/trigger/pool/growth/zz/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "' 'test'>\n" + "<doc></doc>"; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_decl_handler, + dummy_end_doctype_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != (DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG | + DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG)) + fail("Doctype handler functions not called"); +} +END_TEST + +START_TEST(test_alloc_parse_public_doctype_long_name) +{ + const char *text = + "<?xml version='1.0' encoding='utf-8'?>\n" + "<!DOCTYPE doc PUBLIC 'http://example.com/foo' '" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "'>\n" + "<doc></doc>"; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetDoctypeDeclHandler(parser, + dummy_start_doctype_decl_handler, + dummy_end_doctype_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +static int XMLCALL +external_entity_alloc(XML_Parser parser, + const XML_Char *context, + const XML_Char *UNUSED_P(base), + const XML_Char *UNUSED_P(systemId), + const XML_Char *UNUSED_P(publicId)) +{ + const char *text = (const char *)XML_GetUserData(parser); + XML_Parser ext_parser; + int parse_res; + + ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + return XML_STATUS_ERROR; + parse_res = _XML_Parse_SINGLE_BYTES(ext_parser, text, strlen(text), + XML_TRUE); + XML_ParserFree(ext_parser); + return parse_res; +} + +/* Test foreign DTD handling */ +START_TEST(test_alloc_set_foreign_dtd) +{ + const char *text1 = + "<?xml version='1.0' encoding='us-ascii'?>\n" + "<doc>&entity;</doc>"; + char text2[] = "<!ELEMENT doc (#PCDATA)*>"; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetUserData(parser, &text2); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE) + fail("Could not set foreign DTD"); + if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test based on ibm/valid/P32/ibm32v04.xml */ +START_TEST(test_alloc_attribute_enum_value) +{ + const char *text = + "<?xml version='1.0' standalone='no'?>\n" + "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" + "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; + char dtd_text[] = + "<!ELEMENT animal (#PCDATA|a)*>\n" + "<!ELEMENT a EMPTY>\n" + "<!ATTLIST animal xml:space (default|preserve) 'preserve'>"; + int i; + const int max_alloc_count = 30; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test attribute enums sufficient to overflow the string pool */ +START_TEST(test_alloc_realloc_attribute_enum_value) +{ + const char *text = + "<?xml version='1.0' standalone='no'?>\n" + "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" + "<animal>This is a yellow tiger</animal>"; + /* We wish to define a collection of attribute enums that will + * cause the string pool storing them to have to expand. This + * means more than 1024 bytes, including the parentheses and + * separator bars. + */ + char dtd_text[] = + "<!ELEMENT animal (#PCDATA)*>\n" + "<!ATTLIST animal thing " + "(default" + /* Each line is 64 characters */ + "|ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|BBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|CBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|DBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|EBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|FBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|GBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|HBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|IBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|JBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|KBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|LBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|MBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|NBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|OBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|PBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO)" + " 'default'>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + /* An attribute list handler provokes a different code path */ + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +/* Test attribute enums in a #IMPLIED attribute forcing pool growth */ +START_TEST(test_alloc_realloc_implied_attribute) +{ + /* Forcing this particular code path is a balancing act. The + * addition of the closing parenthesis and terminal NUL must be + * what pushes the string of enums over the 1024-byte limit, + * otherwise a different code path will pick up the realloc. + */ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc EMPTY>\n" + "<!ATTLIST doc a " + /* Each line is 64 characters */ + "(ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|BBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|CBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|DBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|EBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|FBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|GBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|HBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|IBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|JBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|KBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|LBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|MBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|NBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|OBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|PBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMN)" + " #IMPLIED>\n" + "]><doc/>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +/* Test attribute enums in a defaulted attribute forcing pool growth */ +START_TEST(test_alloc_realloc_default_attribute) +{ + /* Forcing this particular code path is a balancing act. The + * addition of the closing parenthesis and terminal NUL must be + * what pushes the string of enums over the 1024-byte limit, + * otherwise a different code path will pick up the realloc. + */ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc EMPTY>\n" + "<!ATTLIST doc a " + /* Each line is 64 characters */ + "(ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|BBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|CBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|DBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|EBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|FBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|GBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|HBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|IBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|JBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|KBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|LBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|MBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|NBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|OBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" + "|PBCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMN)" + " 'ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO'" + ">\n]><doc/>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +/* Test long notation name with dodgy allocator */ +START_TEST(test_alloc_notation) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!NOTATION " + /* Each line is 64 characters */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + " SYSTEM 'http://example.org/n'>\n" + "<!ENTITY e SYSTEM 'http://example.org/e' NDATA " + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ">\n" + "<!ELEMENT doc EMPTY>\n" + "]>\n<doc/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != (DUMMY_ENTITY_DECL_HANDLER_FLAG | + DUMMY_NOTATION_DECL_HANDLER_FLAG)) + fail("Entity declaration handler not called"); +} +END_TEST + +/* Test public notation with dodgy allocator */ +START_TEST(test_alloc_public_notation) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!NOTATION note PUBLIC '" + /* 64 characters per line */ + "http://example.com/a/long/enough/name/to/trigger/pool/growth/zz/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "' 'foo'>\n" + "<!ENTITY e SYSTEM 'http://example.com/e' NDATA note>\n" + "<!ELEMENT doc EMPTY>\n" + "]>\n<doc/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation handler not called"); +} +END_TEST + +/* Test public notation with dodgy allocator */ +START_TEST(test_alloc_system_notation) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!NOTATION note SYSTEM '" + /* 64 characters per line */ + "http://example.com/a/long/enough/name/to/trigger/pool/growth/zz/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "'>\n" + "<!ENTITY e SYSTEM 'http://example.com/e' NDATA note>\n" + "<!ELEMENT doc EMPTY>\n" + "]>\n<doc/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + dummy_handler_flags = 0; + XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite allocation failures"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_NOTATION_DECL_HANDLER_FLAG) + fail("Notation handler not called"); +} +END_TEST + +START_TEST(test_alloc_nested_groups) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc " + /* Sixteen elements per line */ + "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," + "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" + "))))))))))))))))))))))))))))))))>\n" + "<!ELEMENT e EMPTY>" + "]>\n" + "<doc><e/></doc>"; + CharData storage; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum reallocation count"); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_alloc_realloc_nested_groups) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc " + /* Sixteen elements per line */ + "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," + "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" + "))))))))))))))))))))))))))))))))>\n" + "<!ELEMENT e EMPTY>" + "]>\n" + "<doc><e/></doc>"; + CharData storage; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + CharData_Init(&storage); + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + XML_SetStartElementHandler(parser, record_element_start_handler); + XML_SetUserData(parser, &storage); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); + CharData_CheckString(&storage, "doce"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler not fired"); +} +END_TEST + +START_TEST(test_alloc_large_group) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc (" + "a1|a2|a3|a4|a5|a6|a7|a8|" + "b1|b2|b3|b4|b5|b6|b7|b8|" + "c1|c2|c3|c4|c5|c6|c7|c8|" + "d1|d2|d3|d4|d5|d6|d7|d8|" + "e1" + ")+>\n" + "]>\n" + "<doc>\n" + "<a1/>\n" + "</doc>\n"; + int i; + const int max_alloc_count = 50; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +START_TEST(test_alloc_realloc_group_choice) +{ + const char *text = + "<!DOCTYPE doc [\n" + "<!ELEMENT doc (" + "a1|a2|a3|a4|a5|a6|a7|a8|" + "b1|b2|b3|b4|b5|b6|b7|b8|" + "c1|c2|c3|c4|c5|c6|c7|c8|" + "d1|d2|d3|d4|d5|d6|d7|d8|" + "e1" + ")+>\n" + "]>\n" + "<doc>\n" + "<a1/>\n" + "<b2 attr='foo'>This is a foo</b2>\n" + "<c3></c3>\n" + "</doc>\n"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetElementDeclHandler(parser, dummy_element_decl_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); + if (dummy_handler_flags != DUMMY_ELEMENT_DECL_HANDLER_FLAG) + fail("Element handler flag not raised"); +} +END_TEST + +START_TEST(test_alloc_pi_in_epilog) +{ + const char *text = + "<doc></doc>\n" + "<?pi in epilog?>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetProcessingInstructionHandler(parser, dummy_pi_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse completed despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_PI_HANDLER_FLAG) + fail("Processing instruction handler not invoked"); +} +END_TEST + +START_TEST(test_alloc_comment_in_epilog) +{ + const char *text = + "<doc></doc>\n" + "<!-- comment in epilog -->"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetCommentHandler(parser, dummy_comment_handler); + dummy_handler_flags = 0; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse completed despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); + if (dummy_handler_flags != DUMMY_COMMENT_HANDLER_FLAG) + fail("Processing instruction handler not invoked"); +} +END_TEST + +START_TEST(test_alloc_realloc_long_attribute_value) +{ + const char *text = + "<!DOCTYPE doc [<!ENTITY foo '" + /* Each line is 64 characters */ + "This entity will be substituted as an attribute value, and is " + "calculated to be exactly long enough that the terminating NUL " + "that the library adds internally will trigger the string pool to" + "grow. GHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "'>]>\n" + "<doc a='&foo;'></doc>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +START_TEST(test_alloc_attribute_whitespace) +{ + const char *text = "<doc a=' '></doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +START_TEST(test_alloc_attribute_predefined_entity) +{ + const char *text = "<doc a='&'></doc>"; + int i; + const int max_alloc_count = 15; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test that a character reference at the end of a suitably long + * default value for an attribute can trigger pool growth, and recovers + * if the allocator fails on it. + */ +START_TEST(test_alloc_long_attr_default_with_char_ref) +{ + const char *text = + "<!DOCTYPE doc [<!ATTLIST doc a CDATA '" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHI" + "1'>]>\n" + "<doc/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test that a long character reference substitution triggers a pool + * expansion correctly for an attribute value. + */ +START_TEST(test_alloc_long_attr_value) +{ + const char *text = + "<!DOCTYPE test [<!ENTITY foo '\n" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "'>]>\n" + "<test a='&foo;'/>"; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing allocator"); + if (i == max_alloc_count) + fail("Parse failed at maximum allocation count"); +} +END_TEST + +/* Test that an error in a nested parameter entity substitution is + * handled correctly. It seems unlikely that the code path being + * exercised can be reached purely by carefully crafted XML, but an + * allocation error in the right place will definitely do it. + */ +START_TEST(test_alloc_nested_entities) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" + "<doc />"; + ExtFaults test_data = { + "<!ENTITY % pe1 '" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "'>\n" + "<!ENTITY % pe2 '%pe1;'>\n" + "%pe2;", + "Memory Fail not faulted", + NULL, + XML_ERROR_NO_MEMORY + }; + + /* Causes an allocation error in a nested storeEntityValue() */ + allocation_count = 12; + XML_SetUserData(parser, &test_data); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_faulter); + expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, + "Entity allocation failure not noted"); +} +END_TEST + +START_TEST(test_alloc_realloc_param_entity_newline) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc/>"; + char dtd_text[] = + "<!ENTITY % pe '<!ATTLIST doc att CDATA \"" + /* 64 characters per line */ + "This default value is carefully crafted so that the carriage " + "return right at the end of the entity string causes an internal " + "string pool to have to grow. This allows us to test the alloc " + "failure path from that point. OPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDE" + "\">\n'>" + "%pe;\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +START_TEST(test_alloc_realloc_ce_extends_pe) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" + "<doc/>"; + char dtd_text[] = + "<!ENTITY % pe '<!ATTLIST doc att CDATA \"" + /* 64 characters per line */ + "This default value is carefully crafted so that the character " + "entity at the end causes an internal string pool to have to " + "grow. This allows us to test the allocation failure path from " + "that point onwards. EFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGQ" + "\">\n'>" + "%pe;\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, dtd_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +START_TEST(test_alloc_realloc_attributes) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ATTLIST doc\n" + " a1 (a|b|c) 'a'\n" + " a2 (foo|bar) #IMPLIED\n" + " a3 NMTOKEN #IMPLIED\n" + " a4 NMTOKENS #IMPLIED\n" + " a5 ID #IMPLIED\n" + " a6 IDREF #IMPLIED\n" + " a7 IDREFS #IMPLIED\n" + " a8 ENTITY #IMPLIED\n" + " a9 ENTITIES #IMPLIED\n" + " a10 CDATA #IMPLIED\n" + " >]>\n" + "<doc>wombat</doc>\n"; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + + if (i == 0) + fail("Parse succeeded despite failing reallocator"); + if (i == max_realloc_count) + fail("Parse failed at maximum reallocation count"); +} +END_TEST + +START_TEST(test_alloc_long_doc_name) +{ + const char *text = + /* 64 characters per line */ + "<LongRootElementNameThatWillCauseTheNextAllocationToExpandTheStr" + "ingPoolForTheDTDQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + " a='1'/>"; + int i; + const int max_alloc_count = 20; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +START_TEST(test_alloc_long_base) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY e SYSTEM 'foo'>\n" + "]>\n" + "<doc>&e;</doc>"; + char entity_text[] = "Hello world"; + const char *base = + /* 64 characters per line */ + "LongBaseURI/that/will/overflow/an/internal/buffer/and/cause/it/t" + "o/have/to/grow/PQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/"; + int i; + const int max_alloc_count = 25; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (XML_SetBase(parser, base) == XML_STATUS_ERROR) { + XML_ParserReset(parser, NULL); + continue; + } + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +START_TEST(test_alloc_long_public_id) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY e PUBLIC '" + /* 64 characters per line */ + "LongPublicIDThatShouldResultInAnInternalStringPoolGrowingAtASpec" + "ificMomentKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "' 'bar'>\n" + "]>\n" + "<doc>&e;</doc>"; + char entity_text[] = "Hello world"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +START_TEST(test_alloc_long_entity_value) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ENTITY e1 '" + /* 64 characters per line */ + "Long entity value that should provoke a string pool to grow whil" + "e setting up to parse the external entity below. xyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "'>\n" + " <!ENTITY e2 SYSTEM 'bar'>\n" + "]>\n" + "<doc>&e2;</doc>"; + char entity_text[] = "Hello world"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, entity_text); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_alloc); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +START_TEST(test_alloc_long_notation) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!NOTATION note SYSTEM '" + /* 64 characters per line */ + "ALongNotationNameThatShouldProvokeStringPoolGrowthWhileCallingAn" + "ExternalEntityParserUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "'>\n" + " <!ENTITY e1 SYSTEM 'foo' NDATA " + /* 64 characters per line */ + "ALongNotationNameThatShouldProvokeStringPoolGrowthWhileCallingAn" + "ExternalEntityParserUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB" + ">\n" + " <!ENTITY e2 SYSTEM 'bar'>\n" + "]>\n" + "<doc>&e2;</doc>"; + ExtOption options[] = { + { "foo", "Entity Foo" }, + { "bar", "Entity Bar" }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_alloc_parse_xdecl() */ + alloc_teardown(); + alloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + + +static void +nsalloc_setup(void) +{ + XML_Memory_Handling_Suite memsuite = { + duff_allocator, + duff_reallocator, + free + }; + XML_Char ns_sep[2] = { ' ', '\0' }; + + /* Ensure the parser creation will go through */ + allocation_count = ALLOC_ALWAYS_SUCCEED; + reallocation_count = REALLOC_ALWAYS_SUCCEED; + parser = XML_ParserCreate_MM(NULL, &memsuite, ns_sep); + if (parser == NULL) + fail("Parser not created"); +} + +static void +nsalloc_teardown(void) +{ + basic_teardown(); +} + + +/* Test the effects of allocation failure in simple namespace parsing. + * Based on test_ns_default_with_empty_uri() + */ +START_TEST(test_nsalloc_xmlns) +{ + const char *text = + "<doc xmlns='http://example.org/'>\n" + " <e xmlns=''/>\n" + "</doc>"; + unsigned int i; + const unsigned int max_alloc_count = 30; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + /* Exercise more code paths with a default handler */ + XML_SetDefaultHandler(parser, dummy_default_handler); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* Resetting the parser is insufficient, because some memory + * allocations are cached within the parser. Instead we use + * the teardown and setup routines to ensure that we have the + * right sort of parser back in our hands. + */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at maximum allocation count"); +} +END_TEST + +/* Test XML_ParseBuffer interface with namespace and a dicky allocator */ +START_TEST(test_nsalloc_parse_buffer) +{ + const char *text = "<doc>Hello</doc>"; + void *buffer; + + /* Try a parse before the start of the world */ + /* (Exercises new code path) */ + allocation_count = 0; + if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_ERROR) + fail("Pre-init XML_ParseBuffer not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_NO_MEMORY) + fail("Pre-init XML_ParseBuffer faulted for wrong reason"); + + /* Now with actual memory allocation */ + allocation_count = ALLOC_ALWAYS_SUCCEED; + if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) + xml_failure(parser); + + /* Check that resuming an unsuspended parser is faulted */ + if (XML_ResumeParser(parser) != XML_STATUS_ERROR) + fail("Resuming unsuspended parser not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_NOT_SUSPENDED) + xml_failure(parser); + + /* Get the parser into suspended state */ + XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler); + resumable = XML_TRUE; + buffer = XML_GetBuffer(parser, strlen(text)); + if (buffer == NULL) + fail("Could not acquire parse buffer"); + memcpy(buffer, text, strlen(text)); + if (XML_ParseBuffer(parser, strlen(text), + XML_TRUE) != XML_STATUS_SUSPENDED) + xml_failure(parser); + if (XML_GetErrorCode(parser) != XML_ERROR_NONE) + xml_failure(parser); + if (XML_ParseBuffer(parser, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Suspended XML_ParseBuffer not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED) + xml_failure(parser); + if (XML_GetBuffer(parser, strlen(text)) != NULL) + fail("Suspended XML_GetBuffer not faulted"); + + /* Get it going again and complete the world */ + XML_SetCharacterDataHandler(parser, NULL); + if (XML_ResumeParser(parser) != XML_STATUS_OK) + xml_failure(parser); + if (XML_ParseBuffer(parser, strlen(text), XML_TRUE) != XML_STATUS_ERROR) + fail("Post-finishing XML_ParseBuffer not faulted"); + if (XML_GetErrorCode(parser) != XML_ERROR_FINISHED) + xml_failure(parser); + if (XML_GetBuffer(parser, strlen(text)) != NULL) + fail("Post-finishing XML_GetBuffer not faulted"); +} +END_TEST + +/* Check handling of long prefix names (pool growth) */ +START_TEST(test_nsalloc_long_prefix) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>" + "</" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo>"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Check handling of long uri names (pool growth) */ +START_TEST(test_nsalloc_long_uri) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "' bar:a='12'\n" + "xmlns:bar='http://example.org/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789A/" + "'>" + "</foo:e>"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Test handling of long attribute names with prefixes */ +START_TEST(test_nsalloc_long_attr) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/' bar:" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='12'\n" + "xmlns:bar='http://example.org/'>" + "</foo:e>"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Test handling of an attribute name with a long namespace prefix */ +START_TEST(test_nsalloc_long_attr_prefix) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/' " + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":a='12'\n" + "xmlns:" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>" + "</foo:e>"; + const char *elemstr[] = { + "http://example.org/ e foo", + "http://example.org/ a " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + }; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Test attribute handling in the face of a dodgy reallocator */ +START_TEST(test_nsalloc_realloc_attributes) +{ + const char *text = + "<foo:e xmlns:foo='http://example.org/' bar:a='12'\n" + " xmlns:bar='http://example.org/'>" + "</foo:e>"; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +/* Test long element names with namespaces under a failing allocator */ +START_TEST(test_nsalloc_long_element) +{ + const char *text = + "<foo:thisisalongenoughelementnametotriggerareallocation\n" + " xmlns:foo='http://example.org/' bar:a='12'\n" + " xmlns:bar='http://example.org/'>" + "</foo:thisisalongenoughelementnametotriggerareallocation>"; + const char *elemstr[] = { + "http://example.org/" + " thisisalongenoughelementnametotriggerareallocation foo", + "http://example.org/ a bar" + }; + int i; + const int max_alloc_count = 30; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetReturnNSTriplet(parser, XML_TRUE); + XML_SetUserData(parser, elemstr); + XML_SetElementHandler(parser, + triplet_start_checker, + triplet_end_checker); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_alloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +/* Test the effects of reallocation failure when reassigning a + * binding. + * + * XML_ParserReset does not free the BINDING structures used by a + * parser, but instead adds them to an internal free list to be reused + * as necessary. Likewise the URI buffers allocated for the binding + * aren't freed, but kept attached to their existing binding. If the + * new binding has a longer URI, it will need reallocation. This test + * provokes that reallocation, and tests the control path if it fails. + */ +START_TEST(test_nsalloc_realloc_binding_uri) +{ + const char *first = + "<doc xmlns='http://example.org/'>\n" + " <e xmlns='' />\n" + "</doc>"; + const char *second = + "<doc xmlns='http://example.org/long/enough/URI/to/reallocate/'>\n" + " <e xmlns='' />\n" + "</doc>"; + unsigned i; + const unsigned max_realloc_count = 10; + + /* First, do a full parse that will leave bindings around */ + if (_XML_Parse_SINGLE_BYTES(parser, first, strlen(first), + XML_TRUE) == XML_STATUS_ERROR) + xml_failure(parser); + + /* Now repeat with a longer URI and a duff reallocator */ + for (i = 0; i < max_realloc_count; i++) { + XML_ParserReset(parser, NULL); + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, second, strlen(second), + XML_TRUE) != XML_STATUS_ERROR) + break; + } + if (i == 0) + fail("Parsing worked despite failing reallocation"); + else if (i == max_realloc_count) + fail("Parsing failed at max reallocation count"); +} +END_TEST + +/* Check handling of long prefix names (pool growth) */ +START_TEST(test_nsalloc_realloc_long_prefix) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>" + "</" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":foo>"; + int i; + const int max_realloc_count = 12; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +/* Check handling of even long prefix names (different code path) */ +START_TEST(test_nsalloc_realloc_longer_prefix) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "Q:foo xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "Q='http://example.org/'>" + "</" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "Q:foo>"; + int i; + const int max_realloc_count = 12; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +START_TEST(test_nsalloc_long_namespace) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":e xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "='http://example.org/'>\n" + "<" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":f " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":attr='foo'/>\n" + "</" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + ":e>"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Using a slightly shorter namespace name provokes allocations in + * slightly different places in the code. + */ +START_TEST(test_nsalloc_less_long_namespace) +{ + const char *text = + "<" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":e xmlns:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + "='http://example.org/'>\n" + "<" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":f " + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":att='foo'/>\n" + "</" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AZ" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678" + ":e>"; + int i; + const int max_alloc_count = 40; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +START_TEST(test_nsalloc_long_context) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ATTLIST doc baz ID #REQUIRED>\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKL" + "' baz='2'>\n" + "&en;" + "</doc>"; + ExtOption options[] = { + { "foo", "<!ELEMENT e EMPTY>"}, + { "bar", "<e/>" }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 70; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* This function is void; it will throw a fail() on error, so if it + * returns normally it must have succeeded. + */ +static void +context_realloc_test(const char *text) +{ + ExtOption options[] = { + { "foo", "<!ELEMENT e EMPTY>"}, + { "bar", "<e/>" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 5; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} + +START_TEST(test_nsalloc_realloc_long_context) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKL" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_2) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJK" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_3) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGH" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_4) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_5) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABC" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_6) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNOP" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_context_7) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLM" + "'>\n" + "&en;" + "</doc>"; + + context_realloc_test(text); +} +END_TEST + +START_TEST(test_nsalloc_realloc_long_ge_name) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY " + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + " SYSTEM 'bar'>\n" + "]>\n" + "<doc xmlns='http://example.org/baz'>\n" + "&" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ";" + "</doc>"; + ExtOption options[] = { + { "foo", "<!ELEMENT el EMPTY>" }, + { "bar", "<el/>" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 10; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +/* Test that when a namespace is passed through the context mechanism + * to an external entity parser, the parsers handle reallocation + * failures correctly. The prefix is exactly the right length to + * provoke particular uncommon code paths. + */ +START_TEST(test_nsalloc_realloc_long_context_in_dtd) +{ + const char *text = + "<!DOCTYPE " + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ":doc [\n" + " <!ENTITY First SYSTEM 'foo/First'>\n" + "]>\n" + "<" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ":doc xmlns:" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "='foo/Second'>&First;</" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + ":doc>"; + ExtOption options[] = { + { "foo/First", "Hello world" }, + { NULL, NULL } + }; + int i; + const int max_realloc_count = 20; + + for (i = 0; i < max_realloc_count; i++) { + reallocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing reallocations"); + else if (i == max_realloc_count) + fail("Parsing failed even at max reallocation count"); +} +END_TEST + +START_TEST(test_nsalloc_long_default_in_ext) +{ + const char *text = + "<!DOCTYPE doc [\n" + " <!ATTLIST e a1 CDATA '" + /* 64 characters per line */ + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" + "'>\n" + " <!ENTITY x SYSTEM 'foo'>\n" + "]>\n" + "<doc>&x;</doc>"; + ExtOption options[] = { + { "foo", "<e/>"}, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 50; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +START_TEST(test_nsalloc_long_systemid_in_ext) +{ + const char *text = + "<!DOCTYPE doc SYSTEM 'foo' [\n" + " <!ENTITY en SYSTEM '" + /* 64 characters per line */ + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "'>\n" + "]>\n" + "<doc>&en;</doc>"; + ExtOption options[] = { + { "foo", "<!ELEMENT e EMPTY>" }, + { + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/" + "ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/ABCDEFGHIJKLMNO/", + "<e/>" + }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 55; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at max allocation count"); +} +END_TEST + +/* Test the effects of allocation failure on parsing an element in a + * namespace. Based on test_nsalloc_long_context. + */ +START_TEST(test_nsalloc_prefixed_element) +{ + const char *text = + "<!DOCTYPE pfx:element SYSTEM 'foo' [\n" + " <!ATTLIST pfx:element baz ID #REQUIRED>\n" + " <!ENTITY en SYSTEM 'bar'>\n" + "]>\n" + "<pfx:element xmlns:pfx='http://example.org/' baz='2'>\n" + "&en;" + "</pfx:element>"; + ExtOption options[] = { + { "foo", "<!ELEMENT e EMPTY>" }, + { "bar", "<e/>" }, + { NULL, NULL } + }; + int i; + const int max_alloc_count = 70; + + for (i = 0; i < max_alloc_count; i++) { + allocation_count = i; + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), + XML_TRUE) != XML_STATUS_ERROR) + break; + + /* See comment in test_nsalloc_xmlns() */ + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Success despite failing allocator"); + else if (i == max_alloc_count) + fail("Failed even at full allocation count"); +} +END_TEST static Suite * make_suite(void) @@ -3479,6 +11647,7 @@ make_suite(void) TCase *tc_namespace = tcase_create("XML namespaces"); TCase *tc_misc = tcase_create("miscellaneous tests"); TCase *tc_alloc = tcase_create("allocation tests"); + TCase *tc_nsalloc = tcase_create("namespace allocation tests"); suite_add_tcase(s, tc_basic); tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); @@ -3489,11 +11658,17 @@ make_suite(void) tcase_add_test(tc_basic, test_bom_utf8); tcase_add_test(tc_basic, test_bom_utf16_be); tcase_add_test(tc_basic, test_bom_utf16_le); + tcase_add_test(tc_basic, test_nobom_utf16_le); tcase_add_test(tc_basic, test_illegal_utf8); tcase_add_test(tc_basic, test_utf8_auto_align); tcase_add_test(tc_basic, test_utf16); tcase_add_test(tc_basic, test_utf16_le_epilog_newline); + tcase_add_test(tc_basic, test_not_utf16); + tcase_add_test(tc_basic, test_bad_encoding); tcase_add_test(tc_basic, test_latin1_umlauts); + tcase_add_test(tc_basic, test_long_utf8_character); + tcase_add_test(tc_basic, test_long_latin1_attribute); + tcase_add_test(tc_basic, test_long_ascii_attribute); /* Regression test for SF bug #491986. */ tcase_add_test(tc_basic, test_danish_latin1); /* Regression test for SF bug #514281. */ @@ -3508,9 +11683,13 @@ make_suite(void) tcase_add_test(tc_basic, test_line_number_after_error); tcase_add_test(tc_basic, test_column_number_after_error); tcase_add_test(tc_basic, test_really_long_lines); + tcase_add_test(tc_basic, test_really_long_encoded_lines); tcase_add_test(tc_basic, test_end_element_events); tcase_add_test(tc_basic, test_attr_whitespace_normalization); tcase_add_test(tc_basic, test_xmldecl_misplaced); + tcase_add_test(tc_basic, test_xmldecl_invalid); + tcase_add_test(tc_basic, test_xmldecl_missing_attr); + tcase_add_test(tc_basic, test_xmldecl_missing_value); tcase_add_test(tc_basic, test_unknown_encoding_internal_entity); tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); tcase_add_test(tc_basic, @@ -3522,9 +11701,18 @@ make_suite(void) tcase_add_test(tc_basic, test_not_standalone_handler_accept); tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset_standalone); + tcase_add_test(tc_basic, + test_entity_with_external_subset_unless_standalone); tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs); tcase_add_test(tc_basic, test_ext_entity_set_encoding); + tcase_add_test(tc_basic, test_ext_entity_no_handler); + tcase_add_test(tc_basic, test_ext_entity_set_bom); + tcase_add_test(tc_basic, test_ext_entity_bad_encoding); + tcase_add_test(tc_basic, test_ext_entity_bad_encoding_2); + tcase_add_test(tc_basic, test_ext_entity_invalid_parse); + tcase_add_test(tc_basic, test_ext_entity_invalid_suspended_parse); tcase_add_test(tc_basic, test_dtd_default_handling); + tcase_add_test(tc_basic, test_dtd_attr_handling); tcase_add_test(tc_basic, test_empty_ns_without_namespaces); tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); @@ -3532,19 +11720,39 @@ make_suite(void) tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); tcase_add_test(tc_basic, test_good_cdata_ascii); tcase_add_test(tc_basic, test_good_cdata_utf16); + tcase_add_test(tc_basic, test_good_cdata_utf16_le); + tcase_add_test(tc_basic, test_long_cdata_utf16); + tcase_add_test(tc_basic, test_multichar_cdata_utf16); + tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); tcase_add_test(tc_basic, test_bad_cdata); + tcase_add_test(tc_basic, test_bad_cdata_utf16); + tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); + tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); tcase_add_test(tc_basic, test_memory_allocation); tcase_add_test(tc_basic, test_default_current); tcase_add_test(tc_basic, test_dtd_elements); tcase_add_test(tc_basic, test_set_foreign_dtd); + tcase_add_test(tc_basic, test_foreign_dtd_not_standalone); + tcase_add_test(tc_basic, test_invalid_foreign_dtd); + tcase_add_test(tc_basic, test_foreign_dtd_with_doctype); + tcase_add_test(tc_basic, test_foreign_dtd_without_external_subset); + tcase_add_test(tc_basic, test_empty_foreign_dtd); tcase_add_test(tc_basic, test_set_base); tcase_add_test(tc_basic, test_attributes); tcase_add_test(tc_basic, test_reset_in_entity); tcase_add_test(tc_basic, test_resume_invalid_parse); tcase_add_test(tc_basic, test_resume_resuspended); + tcase_add_test(tc_basic, test_cdata_default); tcase_add_test(tc_basic, test_subordinate_reset); tcase_add_test(tc_basic, test_subordinate_suspend); + tcase_add_test(tc_basic, test_subordinate_xdecl_suspend); + tcase_add_test(tc_basic, test_subordinate_xdecl_abort); tcase_add_test(tc_basic, test_explicit_encoding); + tcase_add_test(tc_basic, test_trailing_cr); + tcase_add_test(tc_basic, test_ext_entity_trailing_cr); + tcase_add_test(tc_basic, test_trailing_rsqb); + tcase_add_test(tc_basic, test_ext_entity_trailing_rsqb); + tcase_add_test(tc_basic, test_ext_entity_good_cdata); tcase_add_test(tc_basic, test_user_parameters); tcase_add_test(tc_basic, test_ext_entity_ref_parameter); tcase_add_test(tc_basic, test_empty_parse); @@ -3553,7 +11761,110 @@ make_suite(void) tcase_add_test(tc_basic, test_byte_info_at_end); tcase_add_test(tc_basic, test_byte_info_at_error); tcase_add_test(tc_basic, test_byte_info_at_cdata); + tcase_add_test(tc_basic, test_predefined_entities); tcase_add_test(tc_basic, test_invalid_tag_in_dtd); + tcase_add_test(tc_basic, test_not_predefined_entities); + tcase_add_test(tc_basic, test_ignore_section); + tcase_add_test(tc_basic, test_ignore_section_utf16); + tcase_add_test(tc_basic, test_ignore_section_utf16_be); + tcase_add_test(tc_basic, test_bad_ignore_section); + tcase_add_test(tc_basic, test_external_entity_values); + tcase_add_test(tc_basic, test_ext_entity_not_standalone); + tcase_add_test(tc_basic, test_ext_entity_value_abort); + tcase_add_test(tc_basic, test_bad_public_doctype); + tcase_add_test(tc_basic, test_attribute_enum_value); + tcase_add_test(tc_basic, test_predefined_entity_redefinition); + tcase_add_test(tc_basic, test_dtd_stop_processing); + tcase_add_test(tc_basic, test_public_notation_no_sysid); + tcase_add_test(tc_basic, test_nested_groups); + tcase_add_test(tc_basic, test_group_choice); + tcase_add_test(tc_basic, test_standalone_parameter_entity); + tcase_add_test(tc_basic, test_skipped_parameter_entity); + tcase_add_test(tc_basic, test_recursive_external_parameter_entity); + tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); + tcase_add_test(tc_basic, test_suspend_xdecl); + tcase_add_test(tc_basic, test_abort_epilog); + tcase_add_test(tc_basic, test_abort_epilog_2); + tcase_add_test(tc_basic, test_suspend_epilog); + tcase_add_test(tc_basic, test_unfinished_epilog); + tcase_add_test(tc_basic, test_partial_char_in_epilog); + tcase_add_test(tc_basic, test_hash_collision); + tcase_add_test(tc_basic, test_suspend_resume_internal_entity); + tcase_add_test(tc_basic, test_resume_entity_with_syntax_error); + tcase_add_test(tc_basic, test_suspend_resume_parameter_entity); + tcase_add_test(tc_basic, test_restart_on_error); + tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); + tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); + tcase_add_test(tc_basic, test_trailing_cr_in_att_value); + tcase_add_test(tc_basic, test_standalone_internal_entity); + tcase_add_test(tc_basic, test_skipped_external_entity); + tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); + tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); + tcase_add_test(tc_basic, test_param_entity_with_trailing_cr); + tcase_add_test(tc_basic, test_invalid_character_entity); + tcase_add_test(tc_basic, test_invalid_character_entity_2); + tcase_add_test(tc_basic, test_invalid_character_entity_3); + tcase_add_test(tc_basic, test_invalid_character_entity_4); + tcase_add_test(tc_basic, test_pi_handled_in_default); + tcase_add_test(tc_basic, test_comment_handled_in_default); + tcase_add_test(tc_basic, test_pi_yml); + tcase_add_test(tc_basic, test_pi_xnl); + tcase_add_test(tc_basic, test_pi_xmm); + tcase_add_test(tc_basic, test_utf16_pi); + tcase_add_test(tc_basic, test_utf16_be_pi); + tcase_add_test(tc_basic, test_utf16_be_comment); + tcase_add_test(tc_basic, test_utf16_le_comment); + tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); + tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); + tcase_add_test(tc_basic, test_unknown_encoding_success); + tcase_add_test(tc_basic, test_unknown_encoding_bad_name); + tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); + tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); + tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); + tcase_add_test(tc_basic, test_invalid_unknown_encoding); + tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); + tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); + tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16le_bom2); + tcase_add_test(tc_basic, test_ext_entity_latin1_utf16be_bom2); + tcase_add_test(tc_basic, test_ext_entity_utf16_be); + tcase_add_test(tc_basic, test_ext_entity_utf16_le); + tcase_add_test(tc_basic, test_ext_entity_utf16_unknown); + tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom); + tcase_add_test(tc_basic, test_utf8_in_cdata_section); + tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); + tcase_add_test(tc_basic, test_trailing_spaces_in_elements); + tcase_add_test(tc_basic, test_utf16_attribute); + tcase_add_test(tc_basic, test_utf16_second_attr); + tcase_add_test(tc_basic, test_attr_after_solidus); + tcase_add_test(tc_basic, test_utf16_pe); + tcase_add_test(tc_basic, test_bad_attr_desc_keyword); + tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); + tcase_add_test(tc_basic, test_bad_doctype); + tcase_add_test(tc_basic, test_bad_doctype_utf16); + tcase_add_test(tc_basic, test_bad_doctype_plus); + tcase_add_test(tc_basic, test_bad_doctype_star); + tcase_add_test(tc_basic, test_bad_doctype_query); + tcase_add_test(tc_basic, test_unknown_encoding_bad_ignore); + tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); + tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); + tcase_add_test(tc_basic, test_entity_public_utf16_be); + tcase_add_test(tc_basic, test_entity_public_utf16_le); + tcase_add_test(tc_basic, test_short_doctype); + tcase_add_test(tc_basic, test_short_doctype_2); + tcase_add_test(tc_basic, test_short_doctype_3); + tcase_add_test(tc_basic, test_long_doctype); + tcase_add_test(tc_basic, test_bad_entity); + tcase_add_test(tc_basic, test_bad_entity_2); + tcase_add_test(tc_basic, test_bad_entity_3); + tcase_add_test(tc_basic, test_bad_entity_4); + tcase_add_test(tc_basic, test_bad_notation); suite_add_tcase(s, tc_namespace); tcase_add_checked_fixture(tc_namespace, @@ -3567,33 +11878,123 @@ make_suite(void) tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3); tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4); + tcase_add_test(tc_namespace, test_ns_unbound_prefix); tcase_add_test(tc_namespace, test_ns_default_with_empty_uri); tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes); + tcase_add_test(tc_namespace, test_ns_duplicate_hashes); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute); tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element); tcase_add_test(tc_namespace, test_ns_parser_reset); + tcase_add_test(tc_namespace, test_ns_long_element); + tcase_add_test(tc_namespace, test_ns_mixed_prefix_atts); + tcase_add_test(tc_namespace, test_ns_extend_uri_buffer); + tcase_add_test(tc_namespace, test_ns_reserved_attributes); + tcase_add_test(tc_namespace, test_ns_reserved_attributes_2); + tcase_add_test(tc_namespace, test_ns_extremely_long_prefix); + tcase_add_test(tc_namespace, test_ns_unknown_encoding_success); + tcase_add_test(tc_namespace, test_ns_double_colon); + tcase_add_test(tc_namespace, test_ns_double_colon_element); + tcase_add_test(tc_namespace, test_ns_bad_attr_leafname); + tcase_add_test(tc_namespace, test_ns_bad_element_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_element_leafname); + tcase_add_test(tc_namespace, test_ns_utf16_doctype); + tcase_add_test(tc_namespace, test_ns_invalid_doctype); + tcase_add_test(tc_namespace, test_ns_double_colon_doctype); suite_add_tcase(s, tc_misc); tcase_add_checked_fixture(tc_misc, NULL, basic_teardown); tcase_add_test(tc_misc, test_misc_alloc_create_parser); tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding); - tcase_add_test(tc_misc, test_misc_alloc_ns); tcase_add_test(tc_misc, test_misc_null_parser); - tcase_add_test(tc_misc, test_misc_alloc_ns_parse_buffer); tcase_add_test(tc_misc, test_misc_error_string); tcase_add_test(tc_misc, test_misc_version); + tcase_add_test(tc_misc, test_misc_features); tcase_add_test(tc_misc, test_misc_attribute_leak); + tcase_add_test(tc_misc, test_misc_utf16le); suite_add_tcase(s, tc_alloc); tcase_add_checked_fixture(tc_alloc, alloc_setup, alloc_teardown); + tcase_add_test(tc_alloc, test_alloc_parse_xdecl); + tcase_add_test(tc_alloc, test_alloc_parse_xdecl_2); + tcase_add_test(tc_alloc, test_alloc_parse_pi); + tcase_add_test(tc_alloc, test_alloc_parse_pi_2); + tcase_add_test(tc_alloc, test_alloc_parse_pi_3); + tcase_add_test(tc_alloc, test_alloc_parse_comment); + tcase_add_test(tc_alloc, test_alloc_parse_comment_2); tcase_add_test(tc_alloc, test_alloc_create_external_parser); tcase_add_test(tc_alloc, test_alloc_run_external_parser); tcase_add_test(tc_alloc, test_alloc_dtd_copy_default_atts); tcase_add_test(tc_alloc, test_alloc_external_entity); + tcase_add_test(tc_alloc, test_alloc_ext_entity_set_encoding); tcase_add_test(tc_alloc, test_alloc_internal_entity); tcase_add_test(tc_alloc, test_alloc_dtd_default_handling); tcase_add_test(tc_alloc, test_alloc_explicit_encoding); tcase_add_test(tc_alloc, test_alloc_set_base); + tcase_add_test(tc_alloc, test_alloc_realloc_buffer); + tcase_add_test(tc_alloc, test_alloc_ext_entity_realloc_buffer); + tcase_add_test(tc_alloc, test_alloc_realloc_many_attributes); + tcase_add_test(tc_alloc, test_alloc_public_entity_value); + tcase_add_test(tc_alloc, test_alloc_realloc_subst_public_entity_value); + tcase_add_test(tc_alloc, test_alloc_parse_public_doctype); + tcase_add_test(tc_alloc, test_alloc_parse_public_doctype_long_name); + tcase_add_test(tc_alloc, test_alloc_set_foreign_dtd); + tcase_add_test(tc_alloc, test_alloc_attribute_enum_value); + tcase_add_test(tc_alloc, test_alloc_realloc_attribute_enum_value); + tcase_add_test(tc_alloc, test_alloc_realloc_implied_attribute); + tcase_add_test(tc_alloc, test_alloc_realloc_default_attribute); + tcase_add_test(tc_alloc, test_alloc_notation); + tcase_add_test(tc_alloc, test_alloc_public_notation); + tcase_add_test(tc_alloc, test_alloc_system_notation); + tcase_add_test(tc_alloc, test_alloc_nested_groups); + tcase_add_test(tc_alloc, test_alloc_realloc_nested_groups); + tcase_add_test(tc_alloc, test_alloc_large_group); + tcase_add_test(tc_alloc, test_alloc_realloc_group_choice); + tcase_add_test(tc_alloc, test_alloc_pi_in_epilog); + tcase_add_test(tc_alloc, test_alloc_comment_in_epilog); + tcase_add_test(tc_alloc, test_alloc_realloc_long_attribute_value); + tcase_add_test(tc_alloc, test_alloc_attribute_whitespace); + tcase_add_test(tc_alloc, test_alloc_attribute_predefined_entity); + tcase_add_test(tc_alloc, test_alloc_long_attr_default_with_char_ref); + tcase_add_test(tc_alloc, test_alloc_long_attr_value); + tcase_add_test(tc_alloc, test_alloc_nested_entities); + tcase_add_test(tc_alloc, test_alloc_realloc_param_entity_newline); + tcase_add_test(tc_alloc, test_alloc_realloc_ce_extends_pe); + tcase_add_test(tc_alloc, test_alloc_realloc_attributes); + tcase_add_test(tc_alloc, test_alloc_long_doc_name); + tcase_add_test(tc_alloc, test_alloc_long_base); + tcase_add_test(tc_alloc, test_alloc_long_public_id); + tcase_add_test(tc_alloc, test_alloc_long_entity_value); + tcase_add_test(tc_alloc, test_alloc_long_notation); + + suite_add_tcase(s, tc_nsalloc); + tcase_add_checked_fixture(tc_nsalloc, nsalloc_setup, nsalloc_teardown); + tcase_add_test(tc_nsalloc, test_nsalloc_xmlns); + tcase_add_test(tc_nsalloc, test_nsalloc_parse_buffer); + tcase_add_test(tc_nsalloc, test_nsalloc_long_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_long_uri); + tcase_add_test(tc_nsalloc, test_nsalloc_long_attr); + tcase_add_test(tc_nsalloc, test_nsalloc_long_attr_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_attributes); + tcase_add_test(tc_nsalloc, test_nsalloc_long_element); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_binding_uri); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_longer_prefix); + tcase_add_test(tc_nsalloc, test_nsalloc_long_namespace); + tcase_add_test(tc_nsalloc, test_nsalloc_less_long_namespace); + tcase_add_test(tc_nsalloc, test_nsalloc_long_context); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_2); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_3); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_4); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_5); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_6); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_7); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_ge_name); + tcase_add_test(tc_nsalloc, test_nsalloc_realloc_long_context_in_dtd); + tcase_add_test(tc_nsalloc, test_nsalloc_long_default_in_ext); + tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); + tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); return s; } |