This is an automated email from the ASF dual-hosted git repository. ardovm pushed a commit to branch libexpat in repository https://gitbox.apache.org/repos/asf/openoffice.git
commit 5055919709f5aaebdd15208a35c55991bc39d195 Author: Arrigo Marchiori <[email protected]> AuthorDate: Sun Aug 29 14:25:43 2021 +0200 Backport changes from libexpat 2.4.0 into ours Backport by Carl Marcum --- main/expat/expat-2.2.11.patch | 2745 +++++++++++++++++++++++++++++++++++++++++ main/expat/makefile.mk | 2 +- 2 files changed, 2746 insertions(+), 1 deletion(-) diff --git a/main/expat/expat-2.2.11.patch b/main/expat/expat-2.2.11.patch new file mode 100644 index 0000000..289362d --- /dev/null +++ b/main/expat/expat-2.2.11.patch @@ -0,0 +1,2745 @@ +--- misc/expat-2.2.10/Changes 2020-10-03 11:14:57.000000000 -0400 ++++ misc/build/expat-2.2.10/Changes 2021-07-18 17:21:48.000000000 -0400 +@@ -2,6 +2,43 @@ + https://github.com/libexpat/libexpat/labels/help%20wanted + If you can help, please get in touch. Thanks! + ++ ++Release 2.2.11 XXX XXXXX XX XXXX ++ Security fixes: ++ #34 #466 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks ++ (denial-of-service; flavors targeting CPU time or RAM or both, ++ leveraging general entities or parameter entities or both) ++ by tracking and limiting the input amplification factor ++ (<amplification> := (<direct> + <indirect>) / <direct>). ++ By conservative default, amplification up to a factor of 100.0 ++ is tolerated and rejection only starts after 8 MiB of output bytes ++ (=<direct> + <indirect>) have been processed. ++ A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH signals ++ this condition. ++ New features: ++ #34 #466 Add two new API functions to further tighten billion laughs ++ protection parameters when desired. ++ - XML_SetBillionLaughsAttackProtectionMaximumAmplification ++ - XML_SetBillionLaughsAttackProtectionActivationThreshold ++ Please see file "doc/reference.html" for more details. ++ If you ever need to increase the defaults for non-attack XML ++ payload, please file a bug report with libexpat. ++ #34 #466 Introduce environment switches EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) ++ and EXPAT_ENTITY_DEBUG=(0|1) for runtime debugging of accounting ++ and entity processing; specific behavior of these values may ++ change in the future. ++ #34 #466 xmlwf: Add arguments "-a FACTOR" and "-b BYTES" to further tighten ++ billion laughs protection parameters when desired. ++ If you ever need to increase the defaults for non-attack XML ++ payload, please file a bug report with libexpat. ++ ++ Special thanks to: ++ Nick Wellnhofer ++ Yury Gribov ++ and ++ Clang LeakSan ++ JetBrains ++ + Release 2.2.10 Sat October 3 2020 + Bug fixes: + #390 #395 #398 Fix undefined behavior during parsing caused by +diff -ru misc/expat-2.2.10/CMakeLists.txt misc/build/expat-2.2.10/CMakeLists.txt +--- misc/expat-2.2.10/CMakeLists.txt 2020-10-03 11:14:57.000000000 -0400 ++++ misc/build/expat-2.2.10/CMakeLists.txt 2021-08-25 18:35:36.000000000 -0400 +@@ -448,14 +448,12 @@ + endif() + endfunction() + +- add_executable(runtests tests/runtests.c ${test_SRCS}) ++ add_executable(runtests tests/runtests.c ${test_SRCS} ${expat_SRCS}) + set_property(TARGET runtests PROPERTY RUNTIME_OUTPUT_DIRECTORY tests) +- target_link_libraries(runtests expat) + expat_add_test(runtests $<TARGET_FILE:runtests>) + +- add_executable(runtestspp tests/runtestspp.cpp ${test_SRCS}) ++ add_executable(runtestspp tests/runtestspp.cpp ${test_SRCS} ${expat_SRCS}) + set_property(TARGET runtestspp PROPERTY RUNTIME_OUTPUT_DIRECTORY tests) +- target_link_libraries(runtestspp expat) + expat_add_test(runtestspp $<TARGET_FILE:runtestspp>) + endif() + +diff -ru misc/expat-2.2.10/doc/Makefile.in misc/build/expat-2.2.10/doc/Makefile.in +--- misc/expat-2.2.10/doc/Makefile.in 2020-10-03 11:37:06.000000000 -0400 ++++ misc/build/expat-2.2.10/doc/Makefile.in 2021-07-18 18:17:02.000000000 -0400 +@@ -1,7 +1,7 @@ +-# Makefile.in generated by automake 1.16.2 from Makefile.am. ++# Makefile.in generated by automake 1.16.1 from Makefile.am. + # @configure_input@ + +-# Copyright (C) 1994-2020 Free Software Foundation, Inc. ++# Copyright (C) 1994-2018 Free Software Foundation, Inc. + + # This Makefile.in is free software; the Free Software Foundation + # gives unlimited permission to copy and/or distribute it, +@@ -314,7 +314,6 @@ + prefix = @prefix@ + program_transform_name = @program_transform_name@ + psdir = @psdir@ +-runstatedir = @runstatedir@ + sbindir = @sbindir@ + sharedstatedir = @sharedstatedir@ + srcdir = @srcdir@ +diff -ru misc/expat-2.2.10/doc/reference.html misc/build/expat-2.2.10/doc/reference.html +--- misc/expat-2.2.10/doc/reference.html 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/doc/reference.html 2021-07-18 17:21:48.000000000 -0400 +@@ -120,6 +120,13 @@ + <li><a href="#XML_GetInputContext">XML_GetInputContext</a></li> + </ul> + </li> ++ <li> ++ <a href="#billion-laughs">Billion Laughs Attack Protection</a> ++ <ul> ++ <li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li> ++ <li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li> ++ </ul> ++ </li> + <li><a href="#miscellaneous">Miscellaneous Functions</a> + <ul> + <li><a href="#XML_SetUserData">XML_SetUserData</a></li> +@@ -1998,6 +2005,98 @@ + return NULL.</p> + </div> + ++ ++ ++<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3> ++ ++<p>The functions in this section configure the built-in ++ protection against various forms of ++ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p> ++ ++<pre class="fcndec" id="XML_SetBillionLaughsAttackProtectionMaximumAmplification"> ++/* Added in Expat 2.2.11. */ ++XML_Bool XMLCALL ++XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p, ++ float maximumAmplificationFactor); ++</pre> ++<div class="fcndef"> ++ <p> ++ Sets the maximum tolerated amplification factor ++ for protection against ++ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> ++ (default: <code>100.0</code>) ++ of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and ++ returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error. ++ </p> ++ ++ The amplification factor is calculated as .. ++ <pre> ++ amplification := (direct + indirect) / direct ++ </pre> ++ .. while parsing, whereas ++ <code>direct</code> is the number of bytes read from the primary document in parsing and ++ <code>indirect</code> is the number of bytes added by expanding entities and reading of external DTD files, combined. ++ ++ <p>For a call to <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to succeed:</p> ++ <ul> ++ <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li> ++ <li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li> ++ </ul> ++ ++ <p> ++ <strong>Note:</strong> ++ If you ever need to increase this value for non-attack payload, ++ please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. ++ </p> ++ ++ <p> ++ <strong>Note:</strong> ++ Peak amplifications ++ of factor 15,000 for the entire payload and ++ of factor 30,000 in the middle of parsing ++ have been observed with small benign files in practice. ++ ++ So if you do reduce the maximum allowed amplification, ++ please make sure that the activation threshold is still big enough ++ to not end up with undesired false positives (i.e. benign files being rejected). ++ </p> ++</div> ++ ++<pre class="fcndec" id="XML_SetBillionLaughsAttackProtectionActivationThreshold"> ++/* Added in Expat 2.2.11. */ ++XML_Bool XMLCALL ++XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, ++ unsigned long long activationThresholdBytes); ++</pre> ++<div class="fcndef"> ++ <p> ++ Sets number of output bytes (including amplification from entity expansion and reading DTD files) ++ needed to activate protection against ++ <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> ++ (default: <code>8 MiB</code>) ++ of parser <code>p</code> to <code>activationThresholdBytes</code>, and ++ returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error. ++ </p> ++ ++ <p>For a call to <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to succeed:</p> ++ <ul> ++ <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li> ++ </ul> ++ ++ <p> ++ <strong>Note:</strong> ++ If you ever need to increase this value for non-attack payload, ++ please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. ++ </p> ++ ++ <p> ++ <strong>Note:</strong> ++ Activation thresholds below 4 MiB are known to break support for ++ <a href="https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> 1.3 payload ++ and are hence not recommended. ++ </p> ++</div> ++ + <h3><a name="miscellaneous">Miscellaneous functions</a></h3> + + <p>The functions in this section either obtain state information from +Only in misc/expat-2.2.10/doc: xmlwf.1 +diff -ru misc/expat-2.2.10/doc/xmlwf.xml misc/build/expat-2.2.10/doc/xmlwf.xml +--- misc/expat-2.2.10/doc/xmlwf.xml 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/doc/xmlwf.xml 2021-07-18 17:21:48.000000000 -0400 +@@ -3,7 +3,7 @@ + <!ENTITY dhfirstname "<firstname>Scott</firstname>"> + <!ENTITY dhsurname "<surname>Bronson</surname>"> + <!-- Please adjust the date whenever revising the manpage. --> +- <!ENTITY dhdate "<date>March 11, 2016</date>"> ++ <!ENTITY dhdate "<date>July 18, 2021</date>"> + <!-- SECTION should be 1-8, maybe w/ subsection other parameters are + allowed: see man(7), man(1). --> + <!ENTITY dhsection "<manvolnum>1</manvolnum>"> +@@ -138,6 +138,52 @@ + </para> + + <variablelist> ++ ++ <variablelist> ++ ++ <varlistentry> ++ <term><option>-a</option> <replaceable>factor</replaceable></term> ++ <listitem> ++ <para> ++ Sets the maximum tolerated amplification factor ++ for protection against billion laughs attacks (default: 100.0). ++ The amplification factor is calculated as .. ++ </para> ++ <literallayout> ++ amplification := (direct + indirect) / direct ++ </literallayout> ++ <para> ++ .. while parsing, whereas ++ <direct> is the number of bytes read ++ from the primary document in parsing and ++ <indirect> is the number of bytes ++ added by expanding entities and reading of external DTD files, ++ combined. ++ </para> ++ <para> ++ <emphasis>NOTE</emphasis>: ++ If you ever need to increase this value for non-attack payload, ++ please file a bug report. ++ </para> ++ </listitem> ++ </varlistentry> ++ ++ <varlistentry> ++ <term><option>-b</option> <replaceable>bytes</replaceable></term> ++ <listitem> ++ <para> ++ Sets the number of output bytes (including amplification) ++ needed to activate protection against billion laughs attacks ++ (default: 8 MiB). ++ This can be thought of as an "activation threshold". ++ </para> ++ <para> ++ <emphasis>NOTE</emphasis>: ++ If you ever need to increase this value for non-attack payload, ++ please file a bug report. ++ </para> ++ </listitem> ++ </varlistentry> + + <varlistentry> + <term><option>-c</option></term> +@@ -455,6 +501,7 @@ + <literallayout> + The Expat home page: http://www.libexpat.org/ + The W3 XML specification: http://www.w3.org/TR/REC-xml ++Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack + </literallayout> + + </para> +diff -ru misc/expat-2.2.10/lib/expat.h misc/build/expat-2.2.10/lib/expat.h +--- misc/expat-2.2.10/lib/expat.h 2020-10-03 11:14:57.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/expat.h 2021-07-18 17:21:48.000000000 -0400 +@@ -115,7 +115,10 @@ + XML_ERROR_RESERVED_PREFIX_XMLNS, + XML_ERROR_RESERVED_NAMESPACE_URI, + /* Added in 2.2.1. */ +- XML_ERROR_INVALID_ARGUMENT ++ XML_ERROR_INVALID_ARGUMENT, ++ /* Added in 2.2.11 */ ++ XML_ERROR_NO_BUFFER, ++ XML_ERROR_AMPLIFICATION_LIMIT_BREACH + }; + + enum XML_Content_Type { +@@ -997,7 +1000,10 @@ + XML_FEATURE_SIZEOF_XML_LCHAR, + XML_FEATURE_NS, + XML_FEATURE_LARGE_SIZE, +- XML_FEATURE_ATTR_INFO ++ XML_FEATURE_ATTR_INFO, ++ /* added in Expat 2.2.11 */ ++ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, ++ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT + /* Additional features must be added to the end of this enum. */ + }; + +@@ -1010,6 +1016,19 @@ + XMLPARSEAPI(const XML_Feature *) + XML_GetFeatureList(void); + ++ ++#ifdef XML_DTD ++/* Added in Expat 2.2.11 */ ++XMLPARSEAPI(XML_Bool) ++XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ XML_Parser parser, float maximumAmplificationFactor); ++ ++/* Added in Expat 2.2.11 */ ++XMLPARSEAPI(XML_Bool) ++XML_SetBillionLaughsAttackProtectionActivationThreshold( ++ XML_Parser parser, unsigned long long activationThresholdBytes); ++#endif ++ + /* Expat follows the semantic versioning convention. + See http://semver.org. + */ +diff -ru misc/expat-2.2.10/lib/internal.h misc/build/expat-2.2.10/lib/internal.h +--- misc/expat-2.2.10/lib/internal.h 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/internal.h 2021-07-18 17:21:48.000000000 -0400 +@@ -101,10 +101,47 @@ + # endif + #endif + ++#include <limits.h> // ULONG_MAX ++ ++#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO) ++# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u" ++# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW ++# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d" ++# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u" ++# else ++# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" ++# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" ++# endif ++#else ++# define EXPAT_FMT_ULL(midpart) "%" midpart "llu" ++# if ! defined(ULONG_MAX) ++# error Compiler did not define ULONG_MAX for us ++# elif ULONG_MAX == 18446744073709551615u // 2^64-1 ++# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" ++# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" ++# else ++# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d" ++# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u" ++# endif ++#endif ++ ++ + #ifndef UNUSED_P + # define UNUSED_P(p) (void)p + #endif + ++/* NOTE BEGIN If you ever patch these defaults to greater values ++ for non-attack XML payload in your environment, ++ please file a bug report with libexpat. Thank you! ++*/ ++#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \ ++ 100.0f ++#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ ++ 8388608 // 8 MiB, 2^23 ++/* NOTE END */ ++ ++#include "expat.h" // so we can use type XML_Parser below ++ + #ifdef __cplusplus + extern "C" { + #endif +@@ -117,6 +154,11 @@ + void + _INTERNAL_trim_to_complete_utf8_characters(const char *from, + const char **fromLimRef); ++#if defined(XML_DTD) ++unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); ++unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); ++const char *unsignedCharToPrintable(unsigned char c); ++#endif + + #ifdef __cplusplus + } +diff -ru misc/expat-2.2.10/lib/libexpat.def misc/build/expat-2.2.10/lib/libexpat.def +--- misc/expat-2.2.10/lib/libexpat.def 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/libexpat.def 2021-07-18 17:21:48.000000000 -0400 +@@ -76,3 +76,6 @@ + XML_SetHashSalt @67 + ; added with version 2.2.5 + _INTERNAL_trim_to_complete_utf8_characters @68 ++; added with version 2.2.11 ++ XML_SetBillionLaughsAttackProtectionActivationThreshold @69 ++ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 +\ No newline at end of file +diff -ru misc/expat-2.2.10/lib/libexpatw.def misc/build/expat-2.2.10/lib/libexpatw.def +--- misc/expat-2.2.10/lib/libexpatw.def 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/libexpatw.def 2021-07-18 17:21:48.000000000 -0400 +@@ -76,3 +76,6 @@ + XML_SetHashSalt @67 + ; added with version 2.2.5 + _INTERNAL_trim_to_complete_utf8_characters @68 ++; added with version 2.2.11 ++ XML_SetBillionLaughsAttackProtectionActivationThreshold @69 ++ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 +\ No newline at end of file +Only in misc/build/expat-2.2.10/lib: Makefile +diff -ru misc/expat-2.2.10/lib/Makefile.in misc/build/expat-2.2.10/lib/Makefile.in +--- misc/expat-2.2.10/lib/Makefile.in 2020-10-03 11:37:06.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/Makefile.in 2021-07-18 18:17:02.000000000 -0400 +@@ -1,7 +1,7 @@ +-# Makefile.in generated by automake 1.16.2 from Makefile.am. ++# Makefile.in generated by automake 1.16.1 from Makefile.am. + # @configure_input@ + +-# Copyright (C) 1994-2020 Free Software Foundation, Inc. ++# Copyright (C) 1994-2018 Free Software Foundation, Inc. + + # This Makefile.in is free software; the Free Software Foundation + # gives unlimited permission to copy and/or distribute it, +@@ -372,7 +372,6 @@ + prefix = @prefix@ + program_transform_name = @program_transform_name@ + psdir = @psdir@ +-runstatedir = @runstatedir@ + sbindir = @sbindir@ + sharedstatedir = @sharedstatedir@ + srcdir = @srcdir@ +diff -ru misc/expat-2.2.10/lib/xmlparse.c misc/build/expat-2.2.10/lib/xmlparse.c +--- misc/expat-2.2.10/lib/xmlparse.c 2020-10-03 11:14:57.000000000 -0400 ++++ misc/build/expat-2.2.10/lib/xmlparse.c 2021-08-28 18:28:18.000000000 -0400 +@@ -47,6 +47,7 @@ + #include <limits.h> /* UINT_MAX */ + #include <stdio.h> /* fprintf */ + #include <stdlib.h> /* getenv, rand_s */ ++#include <math.h> /* isnan */ + + #if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600) + /* vs2008/9.0 and earlier lack stdint.h; _MSC_VER 1600 is vs2010/10.0 */ +@@ -73,6 +74,10 @@ + + #ifdef _WIN32 + # include "winconfig.h" ++#include <float.h> ++#ifndef isnan ++#define isnan _isnan ++#endif + #elif defined(HAVE_EXPAT_CONFIG_H) + # include <expat_config.h> + #endif /* ndef _WIN32 */ +@@ -382,6 +387,31 @@ + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ + } OPEN_INTERNAL_ENTITY; + ++enum XML_Account { ++ XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */ ++ XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity ++ expansion */ ++ XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */ ++}; ++ ++#ifdef XML_DTD ++typedef unsigned long long XmlBigCount; ++typedef struct accounting { ++ XmlBigCount countBytesDirect; ++ XmlBigCount countBytesIndirect; ++ int debugLevel; ++ float maximumAmplificationFactor; // >=1.0 ++ unsigned long long activationThresholdBytes; ++} ACCOUNTING; ++ ++typedef struct entity_stats { ++ unsigned int countEverOpened; ++ unsigned int currentDepth; ++ unsigned int maximumDepthSeen; ++ int debugLevel; ++} ENTITY_STATS; ++#endif /* XML_DTD */ ++ + typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, + const char *end, const char **endPtr); + +@@ -412,13 +442,14 @@ + static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, + const char *s, const char *end, int tok, + const char *next, const char **nextPtr, +- XML_Bool haveMore, XML_Bool allowClosingDoctype); ++ XML_Bool haveMore, XML_Bool allowClosingDoctype, ++ enum XML_Account account); + static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); + static enum XML_Error doContent(XML_Parser parser, int startTagLevel, + const ENCODING *enc, const char *start, + const char *end, const char **endPtr, +- XML_Bool haveMore); ++ XML_Bool haveMore, enum XML_Account account); + static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, + const char **startPtr, const char *end, + const char **nextPtr, XML_Bool haveMore); +@@ -431,7 +462,8 @@ + static void freeBindings(XML_Parser parser, BINDING *bindings); + static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, + const char *s, TAG_NAME *tagNamePtr, +- BINDING **bindingsPtr); ++ BINDING **bindingsPtr, ++ enum XML_Account account); + static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, + const ATTRIBUTE_ID *attId, const XML_Char *uri, + BINDING **bindingsPtr); +@@ -440,15 +472,18 @@ + XML_Parser parser); + static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, +- const char *, STRING_POOL *); ++ const char *, STRING_POOL *, ++ enum XML_Account account); + static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, + XML_Bool isCdata, const char *, +- const char *, STRING_POOL *); ++ const char *, STRING_POOL *, ++ enum XML_Account account); + static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); + static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, +- const char *start, const char *end); ++ const char *start, const char *end, ++ enum XML_Account account); + static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, + const char *start, const char *end); + static int reportComment(XML_Parser parser, const ENCODING *enc, +@@ -512,6 +547,34 @@ + + static void parserInit(XML_Parser parser, const XML_Char *encodingName); + ++#ifdef XML_DTD ++static float accountingGetCurrentAmplification(XML_Parser rootParser); ++static void accountingReportStats(XML_Parser originParser, const char *epilog); ++static void accountingOnAbort(XML_Parser originParser); ++static void accountingReportDiff(XML_Parser rootParser, ++ unsigned int levelsAwayFromRootParser, ++ const char *before, const char *after, ++ ptrdiff_t bytesMore, int source_line, ++ enum XML_Account account); ++static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok, ++ const char *before, const char *after, ++ int source_line, ++ enum XML_Account account); ++ ++static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity, ++ const char *action, int sourceLine); ++static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity, ++ int sourceLine); ++static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity, ++ int sourceLine); ++ ++static XML_Parser getRootParserOf(XML_Parser parser, ++ unsigned int *outLevelDiff); ++#endif /* XML_DTD */ ++ ++static unsigned long getDebugLevel(const char *variableName, ++ unsigned long defaultDebugLevel); ++ + #define poolStart(pool) ((pool)->start) + #define poolEnd(pool) ((pool)->ptr) + #define poolLength(pool) ((pool)->ptr - (pool)->start) +@@ -625,6 +688,10 @@ + enum XML_ParamEntityParsing m_paramEntityParsing; + #endif + unsigned long m_hash_secret_salt; ++#ifdef XML_DTD ++ ACCOUNTING m_accounting; ++ ENTITY_STATS m_entity_stats; ++#endif + }; + + #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +@@ -809,9 +876,8 @@ + + static unsigned long + ENTROPY_DEBUG(const char *label, unsigned long entropy) { +- const char *const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG"); +- if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) { +- fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, ++ if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) { ++ fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label, + (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy)); + } + return entropy; +@@ -855,7 +921,7 @@ + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + } else { + return ENTROPY_DEBUG("fallback(8)", +- entropy * (unsigned long)2305843009213693951ULL); ++ entropy * (unsigned long long)2305843009213693951ULL); + } + #endif + } +@@ -1073,6 +1139,18 @@ + parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; + #endif + parser->m_hash_secret_salt = 0; ++ ++#ifdef XML_DTD ++ memset(&parser->m_accounting, 0, sizeof(ACCOUNTING)); ++ parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u); ++ parser->m_accounting.maximumAmplificationFactor ++ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT; ++ parser->m_accounting.activationThresholdBytes ++ = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT; ++ ++ memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS)); ++ parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u); ++#endif + } + + /* moves list of bindings to m_freeBindingList */ +@@ -2337,6 +2415,13 @@ + /* Added in 2.2.5. */ + case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */ + return XML_L("invalid argument"); ++ /* Added in 2.2.11. */ ++ case XML_ERROR_NO_BUFFER: ++ return XML_L( ++ "a successful prior call to function XML_GetBuffer is required"); ++ case XML_ERROR_AMPLIFICATION_LIMIT_BREACH: ++ return XML_L( ++ "limit on input amplification factor (from DTD and entities) breached"); + } + return NULL; + } +@@ -2373,41 +2458,75 @@ + + const XML_Feature *XMLCALL + XML_GetFeatureList(void) { +- static const XML_Feature features[] +- = {{XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), +- sizeof(XML_Char)}, +- {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), +- sizeof(XML_LChar)}, ++ static const XML_Feature features[] = { ++ {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), ++ sizeof(XML_Char)}, ++ {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), ++ sizeof(XML_LChar)}, + #ifdef XML_UNICODE +- {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, ++ {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, + #endif + #ifdef XML_UNICODE_WCHAR_T +- {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, ++ {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, + #endif + #ifdef XML_DTD +- {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, ++ {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, + #endif + #ifdef XML_CONTEXT_BYTES +- {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), +- XML_CONTEXT_BYTES}, ++ {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), ++ XML_CONTEXT_BYTES}, + #endif + #ifdef XML_MIN_SIZE +- {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, ++ {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, + #endif + #ifdef XML_NS +- {XML_FEATURE_NS, XML_L("XML_NS"), 0}, ++ {XML_FEATURE_NS, XML_L("XML_NS"), 0}, + #endif + #ifdef XML_LARGE_SIZE +- {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, ++ {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, + #endif + #ifdef XML_ATTR_INFO +- {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, ++ {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, + #endif +- {XML_FEATURE_END, NULL, 0}}; ++#ifdef XML_DTD ++ /* Added in Expat 2.2.11. */ ++ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, ++ XML_L("XML_BLAP_MAX_AMP"), ++ (long int) ++ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT}, ++ {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, ++ XML_L("XML_BLAP_ACT_THRES"), ++ EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, ++#endif ++ {XML_FEATURE_END, NULL, 0}}; + + return features; + } + ++#ifdef XML_DTD ++XML_Bool XMLCALL ++XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ XML_Parser parser, float maximumAmplificationFactor) { ++ if ((parser == NULL) || (parser->m_parentParser != NULL) ++ || isnan(maximumAmplificationFactor) ++ || (maximumAmplificationFactor < 1.0f)) { ++ return XML_FALSE; ++ } ++ parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor; ++ return XML_TRUE; ++} ++ ++XML_Bool XMLCALL ++XML_SetBillionLaughsAttackProtectionActivationThreshold( ++ XML_Parser parser, unsigned long long activationThresholdBytes) { ++ if ((parser == NULL) || (parser->m_parentParser != NULL)) { ++ return XML_FALSE; ++ } ++ parser->m_accounting.activationThresholdBytes = activationThresholdBytes; ++ return XML_TRUE; ++} ++#endif /* XML_DTD */ ++ + /* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more +@@ -2460,9 +2579,9 @@ + static enum XML_Error PTRCALL + contentProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { +- enum XML_Error result +- = doContent(parser, 0, parser->m_encoding, start, end, endPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer); ++ enum XML_Error result = doContent( ++ parser, 0, parser->m_encoding, start, end, endPtr, ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; +@@ -2487,6 +2606,14 @@ + int tok = XmlContentTok(parser->m_encoding, start, end, &next); + switch (tok) { + case XML_TOK_BOM: ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, start, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif /* XML_DTD */ ++ + /* If we are at the end of the buffer, this would cause the next stage, + i.e. externalEntityInitProcessor3, to pass control directly to + doContent (by detecting XML_TOK_NONE) without processing any xml text +@@ -2524,6 +2651,10 @@ + const char *next = start; /* XmlContentTok doesn't always set the last arg */ + parser->m_eventPtr = start; + tok = XmlContentTok(parser->m_encoding, start, end, &next); ++ /* Note: These bytes are accounted later in: ++ - processXmlDecl ++ - externalEntityContentProcessor ++ */ + parser->m_eventEndPtr = next; + + switch (tok) { +@@ -2565,7 +2696,8 @@ + const char *end, const char **endPtr) { + enum XML_Error result + = doContent(parser, 1, parser->m_encoding, start, end, endPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer); ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, ++ XML_ACCOUNT_ENTITY_EXPANSION); + if (result == XML_ERROR_NONE) { + if (! storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; +@@ -2576,7 +2708,7 @@ + static enum XML_Error + doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, + const char *s, const char *end, const char **nextPtr, +- XML_Bool haveMore) { ++ XML_Bool haveMore, enum XML_Account account) { + /* save one level of indirection */ + DTD *const dtd = parser->m_dtd; + +@@ -2594,6 +2726,17 @@ + for (;;) { + const char *next = s; /* XmlContentTok doesn't always set the last arg */ + int tok = XmlContentTok(enc, s, end, &next); ++#ifdef XML_DTD ++ const char *accountAfter ++ = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR)) ++ ? (haveMore ? s /* i.e. 0 bytes */ : end) ++ : next; ++ if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__, ++ account)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif + *eventEndPP = next; + switch (tok) { + case XML_TOK_TRAILING_CR: +@@ -2649,6 +2792,14 @@ + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { ++#ifdef XML_DTD ++ /* NOTE: We are replacing 4-6 characters original input for 1 character ++ * so there is no amplification and hence recording without ++ * protection. */ ++ accountingDiffTolerated(parser, tok, (char *)&ch, ++ ((char *)&ch) + sizeof(XML_Char), __LINE__, ++ XML_ACCOUNT_ENTITY_EXPANSION); ++#endif /* XML_DTD */ + if (parser->m_characterDataHandler) + parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1); + else if (parser->m_defaultHandler) +@@ -2767,7 +2918,7 @@ + } + tag->name.str = (XML_Char *)tag->buf; + *toPtr = XML_T('\0'); +- result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); ++ result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); + if (result) + return result; + if (parser->m_startElementHandler) +@@ -2791,7 +2942,8 @@ + if (! name.str) + return XML_ERROR_NO_MEMORY; + poolFinish(&parser->m_tempPool); +- result = storeAtts(parser, enc, s, &name, &bindings); ++ result = storeAtts(parser, enc, s, &name, &bindings, ++ XML_ACCOUNT_NONE /* token spans whole start tag */); + if (result != XML_ERROR_NONE) { + freeBindings(parser, bindings); + return result; +@@ -3055,7 +3207,8 @@ + */ + static enum XML_Error + storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, +- TAG_NAME *tagNamePtr, BINDING **bindingsPtr) { ++ TAG_NAME *tagNamePtr, BINDING **bindingsPtr, ++ enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + ELEMENT_TYPE *elementType; + int nDefaultAtts; +@@ -3165,7 +3318,7 @@ + /* normalize the attribute value */ + result = storeAttributeValue( + parser, enc, isCdata, parser->m_atts[i].valuePtr, +- parser->m_atts[i].valueEnd, &parser->m_tempPool); ++ parser->m_atts[i].valueEnd, &parser->m_tempPool, account); + if (result) + return result; + appAtts[attIndex] = poolStart(&parser->m_tempPool); +@@ -3594,6 +3747,13 @@ + for (;;) { + const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */ + int tok = XmlCdataSectionTok(enc, s, end, &next); ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif + *eventEndPP = next; + switch (tok) { + case XML_TOK_CDATA_SECT_CLOSE: +@@ -3738,6 +3898,13 @@ + *eventPP = s; + *startPtr = NULL; + tok = XmlIgnoreSectionTok(enc, s, end, &next); ++# ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++# endif + *eventEndPP = next; + switch (tok) { + case XML_TOK_IGNORE_SECT: +@@ -3822,6 +3989,15 @@ + const char *versionend; + const XML_Char *storedversion = NULL; + int standalone = -1; ++ ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif ++ + if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)( + isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr, + &version, &versionend, &encodingName, &newEncoding, &standalone)) { +@@ -3971,6 +4147,10 @@ + + for (;;) { + tok = XmlPrologTok(parser->m_encoding, start, end, &next); ++ /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in: ++ - storeEntityValue ++ - processXmlDecl ++ */ + parser->m_eventEndPtr = next; + if (tok <= 0) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { +@@ -3989,7 +4169,8 @@ + break; + } + /* found end of entity value - can store it now */ +- return storeEntityValue(parser, parser->m_encoding, s, end); ++ return storeEntityValue(parser, parser->m_encoding, s, end, ++ XML_ACCOUNT_DIRECT); + } else if (tok == XML_TOK_XML_DECL) { + enum XML_Error result; + result = processXmlDecl(parser, 0, start, next); +@@ -4016,6 +4197,14 @@ + */ + else if (tok == XML_TOK_BOM && next == end + && ! parser->m_parsingStatus.finalBuffer) { ++# ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++# endif ++ + *nextPtr = next; + return XML_ERROR_NONE; + } +@@ -4058,16 +4247,24 @@ + } + /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM. + However, when parsing an external subset, doProlog will not accept a BOM +- as valid, and report a syntax error, so we have to skip the BOM ++ as valid, and report a syntax error, so we have to skip the BOM, and ++ account for the BOM bytes. + */ + else if (tok == XML_TOK_BOM) { ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++ + s = next; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + } + + parser->m_processor = prologProcessor; + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, ++ XML_ACCOUNT_DIRECT); + } + + static enum XML_Error PTRCALL +@@ -4080,6 +4277,9 @@ + + for (;;) { + tok = XmlPrologTok(enc, start, end, &next); ++ /* Note: These bytes are accounted later in: ++ - storeEntityValue ++ */ + if (tok <= 0) { + if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; +@@ -4097,7 +4297,7 @@ + break; + } + /* found end of entity value - can store it now */ +- return storeEntityValue(parser, enc, s, end); ++ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT); + } + start = next; + } +@@ -4111,13 +4311,14 @@ + const char *next = s; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, ++ XML_ACCOUNT_DIRECT); + } + + static enum XML_Error + doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, + int tok, const char *next, const char **nextPtr, XML_Bool haveMore, +- XML_Bool allowClosingDoctype) { ++ XML_Bool allowClosingDoctype, enum XML_Account account) { + #ifdef XML_DTD + static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'}; + #endif /* XML_DTD */ +@@ -4144,6 +4345,10 @@ + static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'}; + static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'}; + ++#ifndef XML_DTD ++ UNUSED_P(account); ++#endif ++ + /* save one level of indirection */ + DTD *const dtd = parser->m_dtd; + +@@ -4208,6 +4413,19 @@ + } + } + role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc); ++#ifdef XML_DTD ++ switch (role) { ++ case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor ++ case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl ++ case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl ++ break; ++ default: ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++ } ++#endif + switch (role) { + case XML_ROLE_XML_DECL: { + enum XML_Error result = processXmlDecl(parser, 0, s, next); +@@ -4483,7 +4701,8 @@ + const XML_Char *attVal; + enum XML_Error result = storeAttributeValue( + parser, enc, parser->m_declAttributeIsCdata, +- s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool); ++ s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool, ++ XML_ACCOUNT_NONE); + if (result) + return result; + attVal = poolStart(&dtd->pool); +@@ -4516,8 +4735,9 @@ + break; + case XML_ROLE_ENTITY_VALUE: + if (dtd->keepProcessing) { +- enum XML_Error result = storeEntityValue( +- parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); ++ enum XML_Error result ++ = storeEntityValue(parser, enc, s + enc->minBytesPerChar, ++ next - enc->minBytesPerChar, XML_ACCOUNT_NONE); + if (parser->m_declEntity) { + parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool); + parser->m_declEntity->textLen +@@ -4907,12 +5127,15 @@ + if (parser->m_externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; ++ entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { ++ entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } ++ entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + handleDefault = XML_FALSE; + if (! dtd->paramEntityRead) { +@@ -5110,6 +5333,13 @@ + for (;;) { + const char *next = NULL; + int tok = XmlPrologTok(parser->m_encoding, s, end, &next); ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, ++ XML_ACCOUNT_DIRECT)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif + parser->m_eventEndPtr = next; + switch (tok) { + /* report partial linebreak - it might be the last token */ +@@ -5183,6 +5413,9 @@ + return XML_ERROR_NO_MEMORY; + } + entity->open = XML_TRUE; ++#ifdef XML_DTD ++ entityTrackingOnOpen(parser, entity, __LINE__); ++#endif + entity->processed = 0; + openEntity->next = parser->m_openInternalEntities; + parser->m_openInternalEntities = openEntity; +@@ -5201,17 +5434,22 @@ + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, +- tok, next, &next, XML_FALSE, XML_FALSE); ++ tok, next, &next, XML_FALSE, XML_FALSE, ++ XML_ACCOUNT_ENTITY_EXPANSION); + } else + #endif /* XML_DTD */ + result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, +- textStart, textEnd, &next, XML_FALSE); ++ textStart, textEnd, &next, XML_FALSE, ++ XML_ACCOUNT_ENTITY_EXPANSION); + + if (result == XML_ERROR_NONE) { + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + entity->processed = (int)(next - textStart); + parser->m_processor = internalEntityProcessor; + } else { ++#ifdef XML_DTD ++ entityTrackingOnClose(parser, entity, __LINE__); ++#endif /* XML_DTD */ + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ +@@ -5244,12 +5482,13 @@ + int tok + = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, +- tok, next, &next, XML_FALSE, XML_TRUE); ++ tok, next, &next, XML_FALSE, XML_TRUE, ++ XML_ACCOUNT_ENTITY_EXPANSION); + } else + #endif /* XML_DTD */ + result = doContent(parser, openEntity->startTagLevel, + parser->m_internalEncoding, textStart, textEnd, &next, +- XML_FALSE); ++ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); + + if (result != XML_ERROR_NONE) + return result; +@@ -5258,6 +5497,9 @@ + entity->processed = (int)(next - (const char *)entity->textPtr); + return result; + } else { ++#ifdef XML_DTD ++ entityTrackingOnClose(parser, entity, __LINE__); ++#endif + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ +@@ -5271,7 +5513,8 @@ + parser->m_processor = prologProcessor; + tok = XmlPrologTok(parser->m_encoding, s, end, &next); + return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE); ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, ++ XML_ACCOUNT_DIRECT); + } else + #endif /* XML_DTD */ + { +@@ -5279,7 +5522,8 @@ + /* see externalEntityContentProcessor vs contentProcessor */ + return doContent(parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, + s, end, nextPtr, +- (XML_Bool)! parser->m_parsingStatus.finalBuffer); ++ (XML_Bool)! parser->m_parsingStatus.finalBuffer, ++ XML_ACCOUNT_DIRECT); + } + } + +@@ -5294,9 +5538,10 @@ + + static enum XML_Error + storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, +- const char *ptr, const char *end, STRING_POOL *pool) { ++ const char *ptr, const char *end, STRING_POOL *pool, ++ enum XML_Account account) { + enum XML_Error result +- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); ++ = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account); + if (result) + return result; + if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) +@@ -5308,11 +5553,23 @@ + + static enum XML_Error + appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, +- const char *ptr, const char *end, STRING_POOL *pool) { ++ const char *ptr, const char *end, STRING_POOL *pool, ++ enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ ++#ifndef XML_DTD ++ UNUSED_P(account); ++#endif ++ + for (;;) { +- const char *next; ++ const char *next ++ = ptr; /* XmlAttributeValueTok doesn't always set the last arg */ + int tok = XmlAttributeValueTok(enc, ptr, end, &next); ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) { ++ accountingOnAbort(parser); ++ return XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ } ++#endif + switch (tok) { + case XML_TOK_NONE: + return XML_ERROR_NONE; +@@ -5372,6 +5629,14 @@ + XML_Char ch = (XML_Char)XmlPredefinedEntityName( + enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); + if (ch) { ++#ifdef XML_DTD ++ /* NOTE: We are replacing 4-6 characters original input for 1 character ++ * so there is no amplification and hence recording without ++ * protection. */ ++ accountingDiffTolerated(parser, tok, (char *)&ch, ++ ((char *)&ch) + sizeof(XML_Char), __LINE__, ++ XML_ACCOUNT_ENTITY_EXPANSION); ++#endif /* XML_DTD */ + if (! poolAppendChar(pool, ch)) + return XML_ERROR_NO_MEMORY; + break; +@@ -5449,9 +5714,16 @@ + enum XML_Error result; + const XML_Char *textEnd = entity->textPtr + entity->textLen; + entity->open = XML_TRUE; ++#ifdef XML_DTD ++ entityTrackingOnOpen(parser, entity, __LINE__); ++#endif + result = appendAttributeValue(parser, parser->m_internalEncoding, + isCdata, (const char *)entity->textPtr, +- (const char *)textEnd, pool); ++ (const char *)textEnd, pool, ++ XML_ACCOUNT_ENTITY_EXPANSION); ++#ifdef XML_DTD ++ entityTrackingOnClose(parser, entity, __LINE__); ++#endif + entity->open = XML_FALSE; + if (result) + return result; +@@ -5481,13 +5753,16 @@ + + static enum XML_Error + storeEntityValue(XML_Parser parser, const ENCODING *enc, +- const char *entityTextPtr, const char *entityTextEnd) { ++ const char *entityTextPtr, const char *entityTextEnd, ++ enum XML_Account account) { + DTD *const dtd = parser->m_dtd; /* save one level of indirection */ + STRING_POOL *pool = &(dtd->entityValuePool); + enum XML_Error result = XML_ERROR_NONE; + #ifdef XML_DTD + int oldInEntityValue = parser->m_prologState.inEntityValue; + parser->m_prologState.inEntityValue = 1; ++#else ++ UNUSED_P(account); + #endif /* XML_DTD */ + /* never return Null for the value argument in EntityDeclHandler, + since this would indicate an external entity; therefore we +@@ -5498,8 +5773,19 @@ + } + + for (;;) { +- const char *next; ++ const char *next ++ = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); ++ ++#ifdef XML_DTD ++ if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__, ++ account)) { ++ accountingOnAbort(parser); ++ result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH; ++ goto endEntityValue; ++ } ++#endif ++ + switch (tok) { + case XML_TOK_PARAM_ENTITY_REF: + #ifdef XML_DTD +@@ -5535,13 +5821,16 @@ + if (parser->m_externalEntityRefHandler) { + dtd->paramEntityRead = XML_FALSE; + entity->open = XML_TRUE; ++ entityTrackingOnOpen(parser, entity, __LINE__); + if (! parser->m_externalEntityRefHandler( + parser->m_externalEntityRefHandlerArg, 0, entity->base, + entity->systemId, entity->publicId)) { ++ entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + result = XML_ERROR_EXTERNAL_ENTITY_HANDLING; + goto endEntityValue; + } ++ entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + if (! dtd->paramEntityRead) + dtd->keepProcessing = dtd->standalone; +@@ -5549,9 +5838,12 @@ + dtd->keepProcessing = dtd->standalone; + } else { + entity->open = XML_TRUE; ++ entityTrackingOnOpen(parser, entity, __LINE__); + result = storeEntityValue( + parser, parser->m_internalEncoding, (const char *)entity->textPtr, +- (const char *)(entity->textPtr + entity->textLen)); ++ (const char *)(entity->textPtr + entity->textLen), ++ XML_ACCOUNT_ENTITY_EXPANSION); ++ entityTrackingOnClose(parser, entity, __LINE__); + entity->open = XML_FALSE; + if (result) + goto endEntityValue; +@@ -6912,3 +7204,766 @@ + memcpy(result, s, charsRequired * sizeof(XML_Char)); + return result; + } ++#ifdef XML_DTD ++ ++static float ++accountingGetCurrentAmplification(XML_Parser rootParser) { ++ const XmlBigCount countBytesOutput ++ = rootParser->m_accounting.countBytesDirect ++ + rootParser->m_accounting.countBytesIndirect; ++ const float amplificationFactor ++ = rootParser->m_accounting.countBytesDirect ++ ? (countBytesOutput ++ / (float)(rootParser->m_accounting.countBytesDirect)) ++ : 1.0f; ++ assert(! rootParser->m_parentParser); ++ return amplificationFactor; ++} ++ ++static void ++accountingReportStats(XML_Parser originParser, const char *epilog) { ++ const XML_Parser rootParser = getRootParserOf(originParser, NULL); ++ float amplificationFactor; ++ assert(! rootParser->m_parentParser); ++ ++ if (rootParser->m_accounting.debugLevel < 1) { ++ return; ++ } ++ ++ amplificationFactor ++ = accountingGetCurrentAmplification(rootParser); ++ fprintf(stderr, ++ "expat: Accounting(%p): Direct " EXPAT_FMT_ULL( ++ "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s", ++ (void *)rootParser, rootParser->m_accounting.countBytesDirect, ++ rootParser->m_accounting.countBytesIndirect, ++ (double)amplificationFactor, epilog); ++} ++ ++static void ++accountingOnAbort(XML_Parser originParser) { ++ accountingReportStats(originParser, " ABORTING\n"); ++} ++ ++static void ++accountingReportDiff(XML_Parser rootParser, ++ unsigned int levelsAwayFromRootParser, const char *before, ++ const char *after, ptrdiff_t bytesMore, int source_line, ++ enum XML_Account account) { ++ const char ellipis[] = "[..]"; ++ const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1; ++ const unsigned int contextLength = 10; ++ const char *walker = before; ++ ++ assert(! rootParser->m_parentParser); ++ ++ fprintf(stderr, ++ " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"", ++ bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP", ++ levelsAwayFromRootParser, source_line, 10, ""); ++ ++ /* Note: Performance is of no concern here */ ++ if ((rootParser->m_accounting.debugLevel >= 3) ++ || (after - before) ++ <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) { ++ for (; walker < after; walker++) { ++ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); ++ } ++ } else { ++ for (; walker < before + contextLength; walker++) { ++ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); ++ } ++ fprintf(stderr, ellipis); ++ walker = after - contextLength; ++ for (; walker < after; walker++) { ++ fprintf(stderr, "%s", unsignedCharToPrintable(walker[0])); ++ } ++ } ++ fprintf(stderr, "\"\n"); ++} ++ ++static XML_Bool ++accountingDiffTolerated(XML_Parser originParser, int tok, const char *before, ++ const char *after, int source_line, ++ enum XML_Account account) { ++ unsigned int levelsAwayFromRootParser; ++ XML_Parser rootParser; ++ int isDirect; ++ ptrdiff_t bytesMore; ++ XmlBigCount * additionTarget; ++ XmlBigCount countBytesOutput; ++ float amplificationFactor; ++ XML_Bool tolerated; ++ /* Note: We need to check the token type *first* to be sure that ++ * we can even access variable <after>, safely. ++ * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */ ++ switch (tok) { ++ case XML_TOK_INVALID: ++ case XML_TOK_PARTIAL: ++ case XML_TOK_PARTIAL_CHAR: ++ case XML_TOK_NONE: ++ return XML_TRUE; ++ } ++ ++ if (account == XML_ACCOUNT_NONE) ++ return XML_TRUE; /* because these bytes have been accounted for, already */ ++ ++ rootParser ++ = getRootParserOf(originParser, &levelsAwayFromRootParser); ++ assert(! rootParser->m_parentParser); ++ ++ isDirect ++ = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser); ++ bytesMore = after - before; ++ ++ additionTarget ++ = isDirect ? &rootParser->m_accounting.countBytesDirect ++ : &rootParser->m_accounting.countBytesIndirect; ++ ++ /* Detect and avoid integer overflow */ ++ if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore) ++ return XML_FALSE; ++ *additionTarget += bytesMore; ++ ++ countBytesOutput ++ = rootParser->m_accounting.countBytesDirect ++ + rootParser->m_accounting.countBytesIndirect; ++ amplificationFactor ++ = accountingGetCurrentAmplification(rootParser); ++ tolerated ++ = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes) ++ || (amplificationFactor ++ <= rootParser->m_accounting.maximumAmplificationFactor); ++ ++ if (rootParser->m_accounting.debugLevel >= 2) { ++ accountingReportStats(rootParser, ""); ++ accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after, ++ bytesMore, source_line, account); ++ } ++ ++ return tolerated; ++} ++ ++unsigned long long ++testingAccountingGetCountBytesDirect(XML_Parser parser) { ++ if (! parser) ++ return 0; ++ return parser->m_accounting.countBytesDirect; ++} ++ ++unsigned long long ++testingAccountingGetCountBytesIndirect(XML_Parser parser) { ++ if (! parser) ++ return 0; ++ return parser->m_accounting.countBytesIndirect; ++} ++ ++static void ++entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity, ++ const char *action, int sourceLine) { ++ const char * entityName; ++ assert(! rootParser->m_parentParser); ++ if (rootParser->m_entity_stats.debugLevel < 1) ++ return; ++ ++# if defined(XML_UNICODE) ++ entityName = "[..]"; ++# else ++ entityName = entity->name; ++# endif ++ ++ fprintf( ++ stderr, ++ "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n", ++ (void *)rootParser, rootParser->m_entity_stats.countEverOpened, ++ rootParser->m_entity_stats.currentDepth, ++ rootParser->m_entity_stats.maximumDepthSeen, ++ (rootParser->m_entity_stats.currentDepth - 1) * 2, "", ++ entity->is_param ? "%" : "&", entityName, action, entity->textLen, ++ sourceLine); ++} ++ ++static void ++entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) { ++ const XML_Parser rootParser = getRootParserOf(originParser, NULL); ++ assert(! rootParser->m_parentParser); ++ ++ rootParser->m_entity_stats.countEverOpened++; ++ rootParser->m_entity_stats.currentDepth++; ++ if (rootParser->m_entity_stats.currentDepth ++ > rootParser->m_entity_stats.maximumDepthSeen) { ++ rootParser->m_entity_stats.maximumDepthSeen++; ++ } ++ ++ entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine); ++} ++ ++static void ++entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) { ++ const XML_Parser rootParser = getRootParserOf(originParser, NULL); ++ assert(! rootParser->m_parentParser); ++ ++ entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine); ++ rootParser->m_entity_stats.currentDepth--; ++} ++ ++static XML_Parser ++getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) { ++ XML_Parser rootParser = parser; ++ unsigned int stepsTakenUpwards = 0; ++ while (rootParser->m_parentParser) { ++ rootParser = rootParser->m_parentParser; ++ stepsTakenUpwards++; ++ } ++ assert(! rootParser->m_parentParser); ++ if (outLevelDiff != NULL) { ++ *outLevelDiff = stepsTakenUpwards; ++ } ++ return rootParser; ++} ++ ++const char * ++unsignedCharToPrintable(unsigned char c) { ++ switch (c) { ++ case 0: ++ return "\\0"; ++ case 1: ++ return "\\x1"; ++ case 2: ++ return "\\x2"; ++ case 3: ++ return "\\x3"; ++ case 4: ++ return "\\x4"; ++ case 5: ++ return "\\x5"; ++ case 6: ++ return "\\x6"; ++ case 7: ++ return "\\x7"; ++ case 8: ++ return "\\x8"; ++ case 9: ++ return "\\t"; ++ case 10: ++ return "\\n"; ++ case 11: ++ return "\\xB"; ++ case 12: ++ return "\\xC"; ++ case 13: ++ return "\\r"; ++ case 14: ++ return "\\xE"; ++ case 15: ++ return "\\xF"; ++ case 16: ++ return "\\x10"; ++ case 17: ++ return "\\x11"; ++ case 18: ++ return "\\x12"; ++ case 19: ++ return "\\x13"; ++ case 20: ++ return "\\x14"; ++ case 21: ++ return "\\x15"; ++ case 22: ++ return "\\x16"; ++ case 23: ++ return "\\x17"; ++ case 24: ++ return "\\x18"; ++ case 25: ++ return "\\x19"; ++ case 26: ++ return "\\x1A"; ++ case 27: ++ return "\\x1B"; ++ case 28: ++ return "\\x1C"; ++ case 29: ++ return "\\x1D"; ++ case 30: ++ return "\\x1E"; ++ case 31: ++ return "\\x1F"; ++ case 32: ++ return " "; ++ case 33: ++ return "!"; ++ case 34: ++ return "\\\""; ++ case 35: ++ return "#"; ++ case 36: ++ return "$"; ++ case 37: ++ return "%"; ++ case 38: ++ return "&"; ++ case 39: ++ return "'"; ++ case 40: ++ return "("; ++ case 41: ++ return ")"; ++ case 42: ++ return "*"; ++ case 43: ++ return "+"; ++ case 44: ++ return ","; ++ case 45: ++ return "-"; ++ case 46: ++ return "."; ++ case 47: ++ return "/"; ++ case 48: ++ return "0"; ++ case 49: ++ return "1"; ++ case 50: ++ return "2"; ++ case 51: ++ return "3"; ++ case 52: ++ return "4"; ++ case 53: ++ return "5"; ++ case 54: ++ return "6"; ++ case 55: ++ return "7"; ++ case 56: ++ return "8"; ++ case 57: ++ return "9"; ++ case 58: ++ return ":"; ++ case 59: ++ return ";"; ++ case 60: ++ return "<"; ++ case 61: ++ return "="; ++ case 62: ++ return ">"; ++ case 63: ++ return "?"; ++ case 64: ++ return "@"; ++ case 65: ++ return "A"; ++ case 66: ++ return "B"; ++ case 67: ++ return "C"; ++ case 68: ++ return "D"; ++ case 69: ++ return "E"; ++ case 70: ++ return "F"; ++ case 71: ++ return "G"; ++ case 72: ++ return "H"; ++ case 73: ++ return "I"; ++ case 74: ++ return "J"; ++ case 75: ++ return "K"; ++ case 76: ++ return "L"; ++ case 77: ++ return "M"; ++ case 78: ++ return "N"; ++ case 79: ++ return "O"; ++ case 80: ++ return "P"; ++ case 81: ++ return "Q"; ++ case 82: ++ return "R"; ++ case 83: ++ return "S"; ++ case 84: ++ return "T"; ++ case 85: ++ return "U"; ++ case 86: ++ return "V"; ++ case 87: ++ return "W"; ++ case 88: ++ return "X"; ++ case 89: ++ return "Y"; ++ case 90: ++ return "Z"; ++ case 91: ++ return "["; ++ case 92: ++ return "\\\\"; ++ case 93: ++ return "]"; ++ case 94: ++ return "^"; ++ case 95: ++ return "_"; ++ case 96: ++ return "`"; ++ case 97: ++ return "a"; ++ case 98: ++ return "b"; ++ case 99: ++ return "c"; ++ case 100: ++ return "d"; ++ case 101: ++ return "e"; ++ case 102: ++ return "f"; ++ case 103: ++ return "g"; ++ case 104: ++ return "h"; ++ case 105: ++ return "i"; ++ case 106: ++ return "j"; ++ case 107: ++ return "k"; ++ case 108: ++ return "l"; ++ case 109: ++ return "m"; ++ case 110: ++ return "n"; ++ case 111: ++ return "o"; ++ case 112: ++ return "p"; ++ case 113: ++ return "q"; ++ case 114: ++ return "r"; ++ case 115: ++ return "s"; ++ case 116: ++ return "t"; ++ case 117: ++ return "u"; ++ case 118: ++ return "v"; ++ case 119: ++ return "w"; ++ case 120: ++ return "x"; ++ case 121: ++ return "y"; ++ case 122: ++ return "z"; ++ case 123: ++ return "{"; ++ case 124: ++ return "|"; ++ case 125: ++ return "}"; ++ case 126: ++ return "~"; ++ case 127: ++ return "\\x7F"; ++ case 128: ++ return "\\x80"; ++ case 129: ++ return "\\x81"; ++ case 130: ++ return "\\x82"; ++ case 131: ++ return "\\x83"; ++ case 132: ++ return "\\x84"; ++ case 133: ++ return "\\x85"; ++ case 134: ++ return "\\x86"; ++ case 135: ++ return "\\x87"; ++ case 136: ++ return "\\x88"; ++ case 137: ++ return "\\x89"; ++ case 138: ++ return "\\x8A"; ++ case 139: ++ return "\\x8B"; ++ case 140: ++ return "\\x8C"; ++ case 141: ++ return "\\x8D"; ++ case 142: ++ return "\\x8E"; ++ case 143: ++ return "\\x8F"; ++ case 144: ++ return "\\x90"; ++ case 145: ++ return "\\x91"; ++ case 146: ++ return "\\x92"; ++ case 147: ++ return "\\x93"; ++ case 148: ++ return "\\x94"; ++ case 149: ++ return "\\x95"; ++ case 150: ++ return "\\x96"; ++ case 151: ++ return "\\x97"; ++ case 152: ++ return "\\x98"; ++ case 153: ++ return "\\x99"; ++ case 154: ++ return "\\x9A"; ++ case 155: ++ return "\\x9B"; ++ case 156: ++ return "\\x9C"; ++ case 157: ++ return "\\x9D"; ++ case 158: ++ return "\\x9E"; ++ case 159: ++ return "\\x9F"; ++ case 160: ++ return "\\xA0"; ++ case 161: ++ return "\\xA1"; ++ case 162: ++ return "\\xA2"; ++ case 163: ++ return "\\xA3"; ++ case 164: ++ return "\\xA4"; ++ case 165: ++ return "\\xA5"; ++ case 166: ++ return "\\xA6"; ++ case 167: ++ return "\\xA7"; ++ case 168: ++ return "\\xA8"; ++ case 169: ++ return "\\xA9"; ++ case 170: ++ return "\\xAA"; ++ case 171: ++ return "\\xAB"; ++ case 172: ++ return "\\xAC"; ++ case 173: ++ return "\\xAD"; ++ case 174: ++ return "\\xAE"; ++ case 175: ++ return "\\xAF"; ++ case 176: ++ return "\\xB0"; ++ case 177: ++ return "\\xB1"; ++ case 178: ++ return "\\xB2"; ++ case 179: ++ return "\\xB3"; ++ case 180: ++ return "\\xB4"; ++ case 181: ++ return "\\xB5"; ++ case 182: ++ return "\\xB6"; ++ case 183: ++ return "\\xB7"; ++ case 184: ++ return "\\xB8"; ++ case 185: ++ return "\\xB9"; ++ case 186: ++ return "\\xBA"; ++ case 187: ++ return "\\xBB"; ++ case 188: ++ return "\\xBC"; ++ case 189: ++ return "\\xBD"; ++ case 190: ++ return "\\xBE"; ++ case 191: ++ return "\\xBF"; ++ case 192: ++ return "\\xC0"; ++ case 193: ++ return "\\xC1"; ++ case 194: ++ return "\\xC2"; ++ case 195: ++ return "\\xC3"; ++ case 196: ++ return "\\xC4"; ++ case 197: ++ return "\\xC5"; ++ case 198: ++ return "\\xC6"; ++ case 199: ++ return "\\xC7"; ++ case 200: ++ return "\\xC8"; ++ case 201: ++ return "\\xC9"; ++ case 202: ++ return "\\xCA"; ++ case 203: ++ return "\\xCB"; ++ case 204: ++ return "\\xCC"; ++ case 205: ++ return "\\xCD"; ++ case 206: ++ return "\\xCE"; ++ case 207: ++ return "\\xCF"; ++ case 208: ++ return "\\xD0"; ++ case 209: ++ return "\\xD1"; ++ case 210: ++ return "\\xD2"; ++ case 211: ++ return "\\xD3"; ++ case 212: ++ return "\\xD4"; ++ case 213: ++ return "\\xD5"; ++ case 214: ++ return "\\xD6"; ++ case 215: ++ return "\\xD7"; ++ case 216: ++ return "\\xD8"; ++ case 217: ++ return "\\xD9"; ++ case 218: ++ return "\\xDA"; ++ case 219: ++ return "\\xDB"; ++ case 220: ++ return "\\xDC"; ++ case 221: ++ return "\\xDD"; ++ case 222: ++ return "\\xDE"; ++ case 223: ++ return "\\xDF"; ++ case 224: ++ return "\\xE0"; ++ case 225: ++ return "\\xE1"; ++ case 226: ++ return "\\xE2"; ++ case 227: ++ return "\\xE3"; ++ case 228: ++ return "\\xE4"; ++ case 229: ++ return "\\xE5"; ++ case 230: ++ return "\\xE6"; ++ case 231: ++ return "\\xE7"; ++ case 232: ++ return "\\xE8"; ++ case 233: ++ return "\\xE9"; ++ case 234: ++ return "\\xEA"; ++ case 235: ++ return "\\xEB"; ++ case 236: ++ return "\\xEC"; ++ case 237: ++ return "\\xED"; ++ case 238: ++ return "\\xEE"; ++ case 239: ++ return "\\xEF"; ++ case 240: ++ return "\\xF0"; ++ case 241: ++ return "\\xF1"; ++ case 242: ++ return "\\xF2"; ++ case 243: ++ return "\\xF3"; ++ case 244: ++ return "\\xF4"; ++ case 245: ++ return "\\xF5"; ++ case 246: ++ return "\\xF6"; ++ case 247: ++ return "\\xF7"; ++ case 248: ++ return "\\xF8"; ++ case 249: ++ return "\\xF9"; ++ case 250: ++ return "\\xFA"; ++ case 251: ++ return "\\xFB"; ++ case 252: ++ return "\\xFC"; ++ case 253: ++ return "\\xFD"; ++ case 254: ++ return "\\xFE"; ++ case 255: ++ return "\\xFF"; ++ default: ++ assert(0); /* never gets here */ ++ return "dead code"; ++ } ++ assert(0); /* never gets here */ ++} ++ ++#endif /* XML_DTD */ ++ ++static unsigned long ++getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) { ++ const char *const valueOrNull = getenv(variableName); ++ const char * value; ++ char *afterValue; ++ unsigned long debugLevel; ++ if (valueOrNull == NULL) { ++ return defaultDebugLevel; ++ } ++ value = valueOrNull; ++ ++ errno = 0; ++ afterValue = (char *)value; ++ debugLevel = strtoul(value, &afterValue, 10); ++ if ((errno != 0) || (afterValue[0] != '\0')) { ++ errno = 0; ++ return defaultDebugLevel; ++ } ++ ++ return debugLevel; ++} +diff -ru misc/expat-2.2.10/tests/benchmark/Makefile.in misc/build/expat-2.2.10/tests/benchmark/Makefile.in +--- misc/expat-2.2.10/tests/benchmark/Makefile.in 2020-10-03 11:37:06.000000000 -0400 ++++ misc/build/expat-2.2.10/tests/benchmark/Makefile.in 2021-07-18 18:17:02.000000000 -0400 +@@ -1,7 +1,7 @@ +-# Makefile.in generated by automake 1.16.2 from Makefile.am. ++# Makefile.in generated by automake 1.16.1 from Makefile.am. + # @configure_input@ + +-# Copyright (C) 1994-2020 Free Software Foundation, Inc. ++# Copyright (C) 1994-2018 Free Software Foundation, Inc. + + # This Makefile.in is free software; the Free Software Foundation + # gives unlimited permission to copy and/or distribute it, +@@ -335,7 +335,6 @@ + prefix = @prefix@ + program_transform_name = @program_transform_name@ + psdir = @psdir@ +-runstatedir = @runstatedir@ + sbindir = @sbindir@ + sharedstatedir = @sharedstatedir@ + srcdir = @srcdir@ +diff -ru misc/expat-2.2.10/tests/Makefile.in misc/build/expat-2.2.10/tests/Makefile.in +--- misc/expat-2.2.10/tests/Makefile.in 2020-10-03 11:37:06.000000000 -0400 ++++ misc/build/expat-2.2.10/tests/Makefile.in 2021-07-18 18:17:02.000000000 -0400 +@@ -1,7 +1,7 @@ +-# Makefile.in generated by automake 1.16.2 from Makefile.am. ++# Makefile.in generated by automake 1.16.1 from Makefile.am. + # @configure_input@ + +-# Copyright (C) 1994-2020 Free Software Foundation, Inc. ++# Copyright (C) 1994-2018 Free Software Foundation, Inc. + + # This Makefile.in is free software; the Free Software Foundation + # gives unlimited permission to copy and/or distribute it, +@@ -616,7 +616,6 @@ + prefix = @prefix@ + program_transform_name = @program_transform_name@ + psdir = @psdir@ +-runstatedir = @runstatedir@ + sbindir = @sbindir@ + sharedstatedir = @sharedstatedir@ + srcdir = @srcdir@ +diff -ru misc/expat-2.2.10/tests/runtests.c misc/build/expat-2.2.10/tests/runtests.c +--- misc/expat-2.2.10/tests/runtests.c 2020-10-03 11:14:57.000000000 -0400 ++++ misc/build/expat-2.2.10/tests/runtests.c 2021-07-18 17:27:08.000000000 -0400 +@@ -45,6 +45,7 @@ + #include <stddef.h> /* ptrdiff_t */ + #include <ctype.h> + #include <limits.h> ++#include <math.h> /* NAN, INFINITY, isnan */ + + #if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600) + /* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */ +@@ -72,7 +73,7 @@ + #include "expat.h" + #include "chardata.h" + #include "structdata.h" +-#include "internal.h" /* for UNUSED_P only */ ++#include "internal.h" + #include "minicheck.h" + #include "memcheck.h" + #include "siphash.h" +@@ -11231,6 +11232,381 @@ + } + END_TEST + ++#if defined(XML_DTD) ++typedef enum XML_Status (*XmlParseFunction)(XML_Parser, const char *, int, int); ++ ++struct AccountingTestCase { ++ const char *primaryText; ++ const char *firstExternalText; /* often NULL */ ++ const char *secondExternalText; /* often NULL */ ++ const unsigned long long expectedCountBytesIndirectExtra; ++ XML_Bool singleBytesWanted; ++}; ++ ++static int ++accounting_external_entity_ref_handler(XML_Parser parser, ++ const XML_Char *context, ++ const XML_Char *base, ++ const XML_Char *systemId, ++ const XML_Char *publicId) { ++ UNUSED_P(context); ++ UNUSED_P(base); ++ UNUSED_P(publicId); ++ ++ const struct AccountingTestCase *const testCase ++ = (const struct AccountingTestCase *)XML_GetUserData(parser); ++ ++ const char *externalText = NULL; ++ if (xcstrcmp(systemId, XCS("first.ent")) == 0) { ++ externalText = testCase->firstExternalText; ++ } else if (xcstrcmp(systemId, XCS("second.ent")) == 0) { ++ externalText = testCase->secondExternalText; ++ } else { ++ assert(! "systemId is neither \"first.ent\" nor \"second.ent\""); ++ } ++ assert(externalText); ++ ++ XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); ++ assert(entParser); ++ ++ const XmlParseFunction xmlParseFunction ++ = testCase->singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; ++ ++ const enum XML_Status status = xmlParseFunction( ++ entParser, externalText, (int)strlen(externalText), XML_TRUE); ++ ++ XML_ParserFree(entParser); ++ return status; ++} ++ ++START_TEST(test_accounting_precision) { ++ const XML_Bool filled_later = XML_TRUE; /* value is arbitrary */ ++ struct AccountingTestCase cases[] = { ++ {"<e/>", NULL, NULL, 0, 0}, ++ {"<e></e>", NULL, NULL, 0, 0}, ++ ++ /* Attributes */ ++ {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0, filled_later}, ++ {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0, 0}, ++ {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0, ++ filled_later}, ++ {"<e k=\"&'><"\" />", NULL, NULL, ++ sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, ++ {"<e1 xmlns='https://example.org/'>\n" ++ " <e2 xmlns=''/>\n" ++ "</e1>", ++ NULL, NULL, 0, filled_later}, ++ ++ /* Text */ ++ {"<e>text</e>", NULL, NULL, 0, filled_later}, ++ {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0, filled_later}, ++ {"<e>&'><"</e>", NULL, NULL, ++ sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later}, ++ {"<e>A)</e>", NULL, NULL, 0, filled_later}, ++ ++ /* Prolog */ ++ {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0, filled_later}, ++ ++ /* Whitespace */ ++ {" <e1> <e2> </e2> </e1> ", NULL, NULL, 0, filled_later}, ++ {"<e1 ><e2 /></e1 >", NULL, NULL, 0, filled_later}, ++ {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0, filled_later}, ++ ++ /* Comments */ ++ {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0, filled_later}, ++ ++ /* Processing instructions */ ++ {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>", ++ NULL, NULL, 0, filled_later}, ++ {"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>", ++ "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>", ++ 0, filled_later}, ++ ++ /* CDATA */ ++ {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later}, ++ ++ /* Conditional sections */ ++ {"<!DOCTYPE r [\n" ++ "<!ENTITY % draft 'INCLUDE'>\n" ++ "<!ENTITY % final 'IGNORE'>\n" ++ "<!ENTITY % import SYSTEM \"first.ent\">\n" ++ "%import;\n" ++ "]>\n" ++ "<r/>\n", ++ "<![%draft;[<!--1-->]]>\n" ++ "<![%final;[<!--22-->]]>", ++ NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")), ++ filled_later}, ++ ++ /* General entities */ ++ {"<!DOCTYPE root [\n" ++ "<!ENTITY nine \"123456789\">\n" ++ "]>\n" ++ "<root>&nine;</root>", ++ NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, ++ {"<!DOCTYPE root [\n" ++ "<!ENTITY nine \"123456789\">\n" ++ "]>\n" ++ "<root k1=\"&nine;\"/>", ++ NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later}, ++ {"<!DOCTYPE root [\n" ++ "<!ENTITY nine \"123456789\">\n" ++ "<!ENTITY nine2 \"&nine;&nine;\">\n" ++ "]>\n" ++ "<root>&nine2;&nine2;&nine2;</root>", ++ NULL, NULL, ++ sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */ ++ * (strlen("&nine;") + strlen("123456789")), ++ filled_later}, ++ {"<!DOCTYPE r [\n" ++ " <!ENTITY five SYSTEM 'first.ent'>\n" ++ "]>\n" ++ "<r>&five;</r>", ++ "12345", NULL, 0, filled_later}, ++ ++ /* Parameter entities */ ++ {"<!DOCTYPE r [\n" ++ "<!ENTITY % comment \"<!---->\">\n" ++ "%comment;\n" ++ "]>\n" ++ "<r/>", ++ NULL, NULL, sizeof(XML_Char) * strlen("<!---->"), filled_later}, ++ {"<!DOCTYPE r [\n" ++ "<!ENTITY % ninedef \"<!ENTITY nine "123456789">\">\n" ++ "%ninedef;\n" ++ "]>\n" ++ "<r>&nine;</r>", ++ NULL, NULL, ++ sizeof(XML_Char) ++ * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789")), ++ filled_later}, ++ {"<!DOCTYPE r [\n" ++ "<!ENTITY % comment \"<!--1-->\">\n" ++ "<!ENTITY % comment2 \"%comment;<!--22-->%comment;\">\n" ++ "%comment2;\n" ++ "]>\n" ++ "<r/>\n", ++ NULL, NULL, ++ sizeof(XML_Char) ++ * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->")), ++ filled_later}, ++ {"<!DOCTYPE r [\n" ++ " <!ENTITY % five \"12345\">\n" ++ " <!ENTITY % five2def \"<!ENTITY five2 "[%five;][%five;]]]]">\">\n" ++ " %five2def;\n" ++ "]>\n" ++ "<r>&five2;</r>", ++ NULL, NULL, /* from "%five2def;": */ ++ sizeof(XML_Char) ++ * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">") ++ + 2 /* calls to "%five;" */ * strlen("12345") ++ + /* from "&five2;": */ strlen("[12345][12345]]]]")), ++ filled_later}, ++ {"<!DOCTYPE r SYSTEM \"first.ent\">\n" ++ "<r/>", ++ "<!ENTITY % comment '<!--1-->'>\n" ++ "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n" ++ "%comment2;", ++ NULL, ++ sizeof(XML_Char) ++ * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->") ++ + 2 /* calls to "%comment;" */ * strlen("<!---->")), ++ filled_later}, ++ {"<!DOCTYPE r SYSTEM 'first.ent'>\n" ++ "<r/>", ++ "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n" ++ "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n" ++ "%e2;\n", ++ "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->"), ++ filled_later}, ++ { ++ "<!DOCTYPE r SYSTEM 'first.ent'>\n" ++ "<r/>", ++ "<!ENTITY % e1 SYSTEM 'second.ent'>\n" ++ "<!ENTITY % e2 '%e1;'>", ++ "<?xml version='1.0' encoding='utf-8'?>\n" ++ "hello\n" ++ "xml" /* without trailing newline! */, ++ 0, ++ filled_later, ++ }, ++ { ++ "<!DOCTYPE r SYSTEM 'first.ent'>\n" ++ "<r/>", ++ "<!ENTITY % e1 SYSTEM 'second.ent'>\n" ++ "<!ENTITY % e2 '%e1;'>", ++ "<?xml version='1.0' encoding='utf-8'?>\n" ++ "hello\n" ++ "xml\n" /* with trailing newline! */, ++ 0, ++ filled_later, ++ }, ++ {"<!DOCTYPE doc SYSTEM 'first.ent'>\n" ++ "<doc></doc>\n", ++ "<!ELEMENT doc EMPTY>\n" ++ "<!ENTITY % e1 SYSTEM 'second.ent'>\n" ++ "<!ENTITY % e2 '%e1;'>\n" ++ "%e1;\n", ++ "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */, ++ strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later}, ++ {"<!DOCTYPE r [\n" ++ " <!ENTITY five SYSTEM 'first.ent'>\n" ++ "]>\n" ++ "<r>&five;</r>", ++ "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later}, ++ }; ++ ++ const size_t countCases = sizeof(cases) / sizeof(cases[0]); ++ size_t u = 0; ++ for (; u < countCases; u++) { ++ size_t v = 0; ++ for (; v < 2; v++) { ++ const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE; ++ const unsigned long long expectedCountBytesDirect ++ = strlen(cases[u].primaryText); ++ const unsigned long long expectedCountBytesIndirect ++ = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) ++ : 0) ++ + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText) ++ : 0) ++ + cases[u].expectedCountBytesIndirectExtra; ++ ++ XML_Parser parser = XML_ParserCreate(NULL); ++ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); ++ if (cases[u].firstExternalText) { ++ XML_SetExternalEntityRefHandler(parser, ++ accounting_external_entity_ref_handler); ++ XML_SetUserData(parser, (void *)&cases[u]); ++ cases[u].singleBytesWanted = singleBytesWanted; ++ } ++ ++ const XmlParseFunction xmlParseFunction ++ = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse; ++ ++ enum XML_Status status ++ = xmlParseFunction(parser, cases[u].primaryText, ++ (int)strlen(cases[u].primaryText), XML_TRUE); ++ if (status != XML_STATUS_OK) { ++ _xml_failure(parser, __FILE__, __LINE__); ++ } ++ ++ const unsigned long long actualCountBytesDirect ++ = testingAccountingGetCountBytesDirect(parser); ++ const unsigned long long actualCountBytesIndirect ++ = testingAccountingGetCountBytesIndirect(parser); ++ ++ XML_ParserFree(parser); ++ ++ if (actualCountBytesDirect != expectedCountBytesDirect) { ++ fprintf( ++ stderr, ++ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( ++ "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n", ++ u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", ++ expectedCountBytesDirect, actualCountBytesDirect); ++ fail("Count of direct bytes is off"); ++ } ++ ++ if (actualCountBytesIndirect != expectedCountBytesIndirect) { ++ fprintf( ++ stderr, ++ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL( ++ "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n", ++ u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks", ++ expectedCountBytesIndirect, actualCountBytesIndirect); ++ fail("Count of indirect bytes is off"); ++ } ++ } ++ } ++} ++END_TEST ++ ++START_TEST(test_billion_laughs_attack_protection_api) { ++ XML_Parser parserWithoutParent = XML_ParserCreate(NULL); ++ XML_Parser parserWithParent ++ = XML_ExternalEntityParserCreate(parserWithoutParent, NULL, NULL); ++ if (parserWithoutParent == NULL) ++ fail("parserWithoutParent is NULL"); ++ if (parserWithParent == NULL) ++ fail("parserWithParent is NULL"); ++ ++ // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f) ++ == XML_TRUE) ++ fail("Call with NULL parser is NOT supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent, ++ 123.0f) ++ == XML_TRUE) ++ fail("Call with non-root parser is NOT supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, NAN) ++ == XML_TRUE) ++ fail("Call with NaN limit is NOT supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, -1.0f) ++ == XML_TRUE) ++ fail("Call with negative limit is NOT supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, 0.9f) ++ == XML_TRUE) ++ fail("Call with positive limit <1.0 is NOT supposed to succeed"); ++ ++ // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, 1.0f) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, 123456.789f) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parserWithoutParent, INFINITY) ++ == XML_FALSE) ++ fail("Call with positive limit >=1.0 is supposed to succeed"); ++ ++ // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases ++ if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123) ++ == XML_TRUE) ++ fail("Call with NULL parser is NOT supposed to succeed"); ++ if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent, ++ 123) ++ == XML_TRUE) ++ fail("Call with non-root parser is NOT supposed to succeed"); ++ ++ // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases ++ if (XML_SetBillionLaughsAttackProtectionActivationThreshold( ++ parserWithoutParent, 123) ++ == XML_FALSE) ++ fail("Call with non-NULL parentless parser is supposed to succeed"); ++ ++ XML_ParserFree(parserWithParent); ++ XML_ParserFree(parserWithoutParent); ++} ++END_TEST ++ ++START_TEST(test_helper_unsigned_char_to_printable) { ++ // Smoke test ++ unsigned char uc = 0; ++ for (; uc < (unsigned char)-1; uc++) { ++ const char *const printable = unsignedCharToPrintable(uc); ++ if (printable == NULL) ++ fail("unsignedCharToPrintable returned NULL"); ++ if (strlen(printable) < (size_t)1) ++ fail("unsignedCharToPrintable returned empty string"); ++ } ++ ++ // Two concrete samples ++ if (strcmp(unsignedCharToPrintable('A'), "A") != 0) ++ fail("unsignedCharToPrintable result mistaken"); ++ if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0) ++ fail("unsignedCharToPrintable result mistaken"); ++} ++END_TEST ++#endif // defined(XML_DTD) ++ ++ ++ + static Suite * + make_suite(void) { + Suite *s = suite_create("basic"); +@@ -11239,6 +11615,9 @@ + TCase *tc_misc = tcase_create("miscellaneous tests"); + TCase *tc_alloc = tcase_create("allocation tests"); + TCase *tc_nsalloc = tcase_create("namespace allocation tests"); ++#if defined(XML_DTD) ++ TCase *tc_accounting = tcase_create("accounting tests"); ++#endif + + suite_add_tcase(s, tc_basic); + tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); +@@ -11603,6 +11982,13 @@ + tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); + tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); + ++#if defined(XML_DTD) ++ suite_add_tcase(s, tc_accounting); ++ tcase_add_test(tc_accounting, test_accounting_precision); ++ tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api); ++ tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable); ++#endif ++ + return s; + } + +diff -ru misc/expat-2.2.10/xmlwf/Makefile.in misc/build/expat-2.2.10/xmlwf/Makefile.in +--- misc/expat-2.2.10/xmlwf/Makefile.in 2020-10-03 11:37:06.000000000 -0400 ++++ misc/build/expat-2.2.10/xmlwf/Makefile.in 2021-07-18 18:17:02.000000000 -0400 +@@ -1,7 +1,7 @@ +-# Makefile.in generated by automake 1.16.2 from Makefile.am. ++# Makefile.in generated by automake 1.16.1 from Makefile.am. + # @configure_input@ + +-# Copyright (C) 1994-2020 Free Software Foundation, Inc. ++# Copyright (C) 1994-2018 Free Software Foundation, Inc. + + # This Makefile.in is free software; the Free Software Foundation + # gives unlimited permission to copy and/or distribute it, +@@ -344,7 +344,6 @@ + prefix = @prefix@ + program_transform_name = @program_transform_name@ + psdir = @psdir@ +-runstatedir = @runstatedir@ + sbindir = @sbindir@ + sharedstatedir = @sharedstatedir@ + srcdir = @srcdir@ +diff -ru misc/expat-2.2.10/xmlwf/xmltchar.h misc/build/expat-2.2.10/xmlwf/xmltchar.h +--- misc/expat-2.2.10/xmlwf/xmltchar.h 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/xmlwf/xmltchar.h 2021-07-18 17:21:48.000000000 -0400 +@@ -54,6 +54,8 @@ + # define tmain wmain + # define tremove _wremove + # define tchar wchar_t ++# define tcstof wcstof ++# define tcstoull wcstoull + #else /* not XML_UNICODE */ + # define T(x) x + # define ftprintf fprintf +@@ -71,4 +73,6 @@ + # define tmain main + # define tremove remove + # define tchar char ++# define tcstof strtof ++# define tcstoull strtoull + #endif /* not XML_UNICODE */ +diff -ru misc/expat-2.2.10/xmlwf/xmlwf.c misc/build/expat-2.2.10/xmlwf/xmlwf.c +--- misc/expat-2.2.10/xmlwf/xmlwf.c 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/xmlwf/xmlwf.c 2021-08-25 18:35:36.000000000 -0400 +@@ -30,11 +30,15 @@ + USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + ++#include <expat_config.h> ++ + #include <assert.h> + #include <stdio.h> + #include <stdlib.h> + #include <stddef.h> + #include <string.h> ++#include <math.h> /* for isnan */ ++#include <errno.h> + + #include "expat.h" + #include "codepage.h" +@@ -50,6 +54,14 @@ + # include <wchar.h> + #endif + ++enum ExitCode { ++ XMLWF_EXIT_SUCCESS = 0, ++ XMLWF_EXIT_INTERNAL_ERROR = 1, ++ XMLWF_EXIT_NOT_WELLFORMED = 2, ++ XMLWF_EXIT_OUTPUT_ERROR = 3, ++ XMLWF_EXIT_USAGE_ERROR = 4, ++}; ++ + /* Structures for handler user data */ + typedef struct NotationList { + struct NotationList *next; +@@ -875,6 +887,12 @@ + T(" -t write no XML output for [t]iming of plain parsing\n") + T(" -N enable adding doctype and [n]otation declarations\n") + T("\n") ++ T("billion laughs attack protection:\n") ++ T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n") ++ T("\n") ++ T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n") ++ T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n") ++ T("\n") + T("info arguments:\n") + T(" -h show this [h]elp message and exit\n") + T(" -v show program's [v]ersion number and exit\n") +@@ -891,6 +909,19 @@ + int wmain(int argc, XML_Char **argv); + #endif + ++#define XMLWF_SHIFT_ARG_INTO(constCharStarTarget, argc, argv, i, j) \ ++ { \ ++ if (argv[i][j + 1] == T('\0')) { \ ++ if (++i == argc) \ ++ usage(argv[0], XMLWF_EXIT_USAGE_ERROR); \ ++ constCharStarTarget = argv[i]; \ ++ } else { \ ++ constCharStarTarget = argv[i] + j + 1; \ ++ } \ ++ i++; \ ++ j = 0; \ ++ } ++ + int + tmain(int argc, XML_Char **argv) { + int i, j; +@@ -902,6 +933,11 @@ + int useNamespaces = 0; + int requireStandalone = 0; + int requiresNotations = 0; ++ ++ float attackMaximumAmplification = -1.0f; /* signaling "not set" */ ++ unsigned long long attackThresholdBytes; ++ XML_Bool attackThresholdGiven = XML_FALSE; ++ + enum XML_ParamEntityParsing paramEntityParsing + = XML_PARAM_ENTITY_PARSING_NEVER; + int useStdin = 0; +@@ -990,6 +1026,49 @@ + case T('v'): + showVersion(argv[0]); + return 0; ++ case T('a'): { ++ const XML_Char *valueText = NULL; ++ XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); ++ ++ errno = 0; ++ XML_Char *afterValueText = (XML_Char *)valueText; ++ attackMaximumAmplification = tcstof(valueText, &afterValueText); ++ if ((errno != 0) || (afterValueText[0] != T('\0')) ++ || isnan(attackMaximumAmplification) ++ || (attackMaximumAmplification < 1.0f)) { ++ // This prevents tperror(..) from reporting misleading "[..]: Success" ++ errno = ERANGE; ++ tperror(T("invalid amplification limit") T( ++ " (needs a floating point number greater or equal than 1.0)")); ++ exit(XMLWF_EXIT_USAGE_ERROR); ++ } ++#ifndef XML_DTD ++ ftprintf(stderr, T("Warning: Given amplification limit ignored") T( ++ ", xmlwf has been compiled without DTD support.\n")); ++#endif ++ break; ++ } ++ case T('b'): { ++ const XML_Char *valueText = NULL; ++ XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j); ++ ++ errno = 0; ++ XML_Char *afterValueText = (XML_Char *)valueText; ++ attackThresholdBytes = tcstoull(valueText, &afterValueText, 10); ++ if ((errno != 0) || (afterValueText[0] != T('\0'))) { ++ // This prevents tperror(..) from reporting misleading "[..]: Success" ++ errno = ERANGE; ++ tperror(T("invalid ignore threshold") ++ T(" (needs an integer from 0 to 2^64-1)")); ++ exit(XMLWF_EXIT_USAGE_ERROR); ++ } ++ attackThresholdGiven = XML_TRUE; ++#ifndef XML_DTD ++ ftprintf(stderr, T("Warning: Given attack threshold ignored") T( ++ ", xmlwf has been compiled without DTD support.\n")); ++#endif ++ break; ++ } + case T('\0'): + if (j > 1) { + i++; +@@ -1020,6 +1099,19 @@ + exit(1); + } + ++ if (attackMaximumAmplification != -1.0f) { ++#ifdef XML_DTD ++ XML_SetBillionLaughsAttackProtectionMaximumAmplification( ++ parser, attackMaximumAmplification); ++#endif ++ } ++ if (attackThresholdGiven) { ++#ifdef XML_DTD ++ XML_SetBillionLaughsAttackProtectionActivationThreshold( ++ parser, attackThresholdBytes); ++#endif ++ } ++ + if (requireStandalone) + XML_SetNotStandaloneHandler(parser, notStandalone); + XML_SetParamEntityParsing(parser, paramEntityParsing); +diff -ru misc/expat-2.2.10/xmlwf/xmlwf_helpgen.py misc/build/expat-2.2.10/xmlwf/xmlwf_helpgen.py +--- misc/expat-2.2.10/xmlwf/xmlwf_helpgen.py 2020-09-25 13:47:39.000000000 -0400 ++++ misc/build/expat-2.2.10/xmlwf/xmlwf_helpgen.py 2021-07-18 17:21:48.000000000 -0400 +@@ -57,6 +57,14 @@ + output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') + output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') + ++billion_laughs = parser.add_argument_group('billion laughs attack protection', ++ description='NOTE: ' ++ 'If you ever need to increase these values ' ++ 'for non-attack payload, please file a bug report.') ++billion_laughs.add_argument('-a', metavar='FACTOR', ++ help='set maximum tolerated [a]mplification factor (default: 100.0)') ++billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') ++ + parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') + + info = parser.add_argument_group('info arguments') diff --git a/main/expat/makefile.mk b/main/expat/makefile.mk index 20fd92b..901ee9a 100644 --- a/main/expat/makefile.mk +++ b/main/expat/makefile.mk @@ -41,7 +41,7 @@ all: TARFILE_NAME=expat-2.2.10 TARFILE_MD5=9d60de01cc0126dfd11121b04838e154 ADDITIONAL_FILES=lib$/makefile.mk -PATCH_FILES=$(TARFILE_NAME).patch +PATCH_FILES=$(TARFILE_NAME).patch expat-2.2.11.patch CONFIGURE_DIR= .IF "$(OS)"=="WNT"
