commit:     5480034ddf29a050671aa22effdd1cefb1a97f55
Author:     Arfrever Frehtes Taifersar Arahesis <Arfrever <AT> Apache <DOT> Org>
AuthorDate: Mon Oct 19 16:00:00 2020 +0000
Commit:     Patrice Clement <monsieurp <AT> gentoo <DOT> org>
CommitDate: Thu Nov  5 21:05:44 2020 +0000
URL:        https://gitweb.gentoo.org/proj/javatoolkit.git/commit/?id=5480034d

allow expanding external entities through SAX parser.

Python xml.sax module by default does not expand external entities since
Python 3.6.7, 3.7.1 and 3.8.0:
https://bugs.python.org/issue17239
3.6: 
https://github.com/python/cpython/commit/582d188e6e3487180891f1fc457a80dec8be26a8
3.7: 
https://github.com/python/cpython/commit/394e55a9279d17240ef6fe85d3b4ea3fe7b6dff5
3.8: 
https://github.com/python/cpython/commit/17b1d5d4e36aa57a9b25a0e694affbd1ee637e45

build.xml files may contain external entities resolving to other .xml files
in given package.

Closes: https://bugs.gentoo.org/698954
Signed-off-by: Arfrever Frehtes Taifersar Arahesis <Arfrever <AT> Apache.Org>
Signed-off-by: Patrice Clement <monsieurp <AT> gentoo.org>

 src/py/javatoolkit/xml/SaxRewriter.py | 10 ++++----
 src/py/javatoolkit/xml/sax.py         | 44 +++++++++++++++++++++++++++++++++++
 src/py/xml-rewrite-2.py               | 17 ++++++++------
 3 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/src/py/javatoolkit/xml/SaxRewriter.py 
b/src/py/javatoolkit/xml/SaxRewriter.py
index f9e224a..8d76de2 100644
--- a/src/py/javatoolkit/xml/SaxRewriter.py
+++ b/src/py/javatoolkit/xml/SaxRewriter.py
@@ -1,14 +1,17 @@
 # -*- coding: UTF-8 -*-
-# Copyright 2004-2005 Gentoo Foundation
+# Copyright 2004-2020 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
+import io
 import os
 import sys
-import io
 
+import xml.sax.handler
 from xml.sax.saxutils import XMLGenerator
 from xml.sax.saxutils import quoteattr
 
+import javatoolkit.xml.sax
+
 class SaxRewriter(XMLGenerator):
     """
     Using Sax gives us the support for writing back doctypes and all easily
@@ -124,8 +127,7 @@ class SaxRewriter(XMLGenerator):
 
     def process(self, in_stream, callback):
         self.startElement = callback
-        from xml.sax import parseString
-        parseString(in_stream.encode('UTF8'), self)
+        javatoolkit.xml.sax.parse_string(in_stream.encode('UTF8'), 
content_handler=self, features={xml.sax.handler.feature_external_ges: 1})
         self.p('\n')
 
 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 nowrap:

diff --git a/src/py/javatoolkit/xml/sax.py b/src/py/javatoolkit/xml/sax.py
new file mode 100644
index 0000000..d3d2a4c
--- /dev/null
+++ b/src/py/javatoolkit/xml/sax.py
@@ -0,0 +1,44 @@
+# Copyright 2020 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import io
+import xml.sax
+import xml.sax.xmlreader
+
+def _make_parser(*, content_handler=None, dtd_handler=None, 
entity_resolver=None, error_handler=None, locale=None, features=None, 
properties=None):
+    parser = xml.sax.make_parser()
+
+    if content_handler is not None:
+        parser.setContentHandler(content_handler)
+    if dtd_handler is not None:
+        parser.setDTDHandler(dtd_handler)
+    if entity_resolver is not None:
+        parser.setEntityResolver(entity_resolver)
+    if error_handler is not None:
+        parser.setErrorHandler(error_handler)
+    if locale is not None:
+        parser.setLocale(locale)
+    if features is not None:
+        for feature, value in features.items():
+            parser.setFeature(feature, value)
+    if properties is not None:
+        for property, value in properties.items():
+            parser.setProperty(property, value)
+
+    return parser
+
+def parse(source, *, content_handler=None, dtd_handler=None, 
entity_resolver=None, error_handler=None, locale=None, features=None, 
properties=None):
+    parser = _make_parser(content_handler=content_handler, 
dtd_handler=dtd_handler, entity_resolver=entity_resolver, 
error_handler=error_handler, locale=locale, features=features, 
properties=properties)
+
+    parser.parse(source)
+
+def parse_string(string, *, content_handler=None, dtd_handler=None, 
entity_resolver=None, error_handler=None, locale=None, features=None, 
properties=None):
+    parser = _make_parser(content_handler=content_handler, 
dtd_handler=dtd_handler, entity_resolver=entity_resolver, 
error_handler=error_handler, locale=locale, features=features, 
properties=properties)
+
+    inputsource = xml.sax.xmlreader.InputSource()
+    if isinstance(string, str):
+        inputsource.setCharacterStream(io.StringIO(string))
+    else:
+        inputsource.setByteStream(io.BytesIO(string))
+
+    parser.parse(inputsource)

diff --git a/src/py/xml-rewrite-2.py b/src/py/xml-rewrite-2.py
index 4035119..ad0f12e 100755
--- a/src/py/xml-rewrite-2.py
+++ b/src/py/xml-rewrite-2.py
@@ -1,13 +1,17 @@
 #!/usr/bin/env python3
-# Copyright 2004-2006 Gentoo Foundation
-# Distributed under the terms of the GNU General Public Licence v2
+# Copyright 2004-2020 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
 
 
-import sys
 import io
-from xml.sax.saxutils import quoteattr, escape
+import sys
+
 from optparse import OptionParser, make_option
-from xml.sax.saxutils import XMLGenerator
+
+import xml.sax.handler
+from xml.sax.saxutils import XMLGenerator, escape, quoteattr
+
+import javatoolkit.xml.sax
 
 
 def add_gentoo_classpath(document):
@@ -190,8 +194,7 @@ class SaxRewriter(XMLGenerator, StreamRewriterBase):
         XMLGenerator.__init__(self, self.buffer, 'UTF-8')
 
     def process(self, in_stream):
-        from xml.sax import parse
-        parse(in_stream, self)
+        javatoolkit.xml.sax.parse(in_stream, content_handler=self, 
features={xml.sax.handler.feature_external_ges: 1})
         self.p('\n')
 
     def startElement(self, name, attrs):

Reply via email to