On Tue, 07 Feb 2017, Yaroslav Halchenko wrote:

> thanks for the very detailed report!!! I have adopted that patch for our
> now dated version of openpyxl  -- upload is coming shortly

unfortunately a blind adaptation of the patch wasn't sufficient, since
running your PoC code results in

$> python pyxl.py blank_passwd.xlsx          
Traceback (most recent call last):
  File "pyxl.py", line 5, in <module>
    wb = load_workbook(filename = sys.argv[1])
  File "/usr/lib/python2.7/dist-packages/openpyxl/reader/excel.py", line 179, 
in load_workbook
    wb.properties = read_properties(archive.read(ARC_CORE))
  File "/usr/lib/python2.7/dist-packages/openpyxl/workbook/properties.py", line 
122, in read_properties
    tag = localname(node)
  File "/usr/lib/python2.7/dist-packages/openpyxl/xml/functions.py", line 122, 
in localname
    m = NS_REGEX.match(node.tag)
TypeError: expected string or buffer


while most recent release 2.4.2 works correctly... before I dive into it -- any
clues what other changes should accompany the fix?  I am attaching my adapted
patch

-- 
Yaroslav O. Halchenko
Center for Open Neuroscience     http://centerforopenneuroscience.org
Dartmouth College, 419 Moore Hall, Hinman Box 6207, Hanover, NH 03755
Phone: +1 (603) 646-9834                       Fax: +1 (603) 646-1419
WWW:   http://www.linkedin.com/in/yarik        
From: Yaroslav Halchenko <[email protected]>
Subject: do not resolve entities 

Adopted from upstream's commit on top of more recent release

Origin: https://bitbucket.org/openpyxl/openpyxl/commits/3b4905f428e1
Bug-Debian: http://bugs.debian.org/854442
Applied-Upstream:  2017-01-17
Last-Update: 2017-02-07

--- a/openpyxl/conftest.py
+++ b/openpyxl/conftest.py
@@ -47,4 +47,8 @@ def pytest_runtest_setup(item):
             from lxml.etree import LIBXML_VERSION
             if LIBXML_VERSION < (3, 4, 0, 0):
                 pytest.skip("LXML >= 3.4 is required")
+        elif item.get_marker("no_lxml"):
+            from openpyxl import LXML
+            if LXML:
+                pytest.skip("LXML has a different interface")
 
--- a/openpyxl/xml/functions.py
+++ b/openpyxl/xml/functions.py
@@ -21,11 +21,14 @@ if LXML is True:
     fromstring,
     tostring,
     register_namespace,
-    iterparse,
     QName,
-    xmlfile
+    xmlfile,
+    XMLParser,
     )
     from xml.etree.cElementTree import iterparse
+    # do not resolve entities
+    safe_parser = XMLParser(resolve_entities=False)
+    fromstring = partial(fromstring, parser=safe_parser)
 else:
     try:
         from xml.etree.cElementTree import (
--- a/openpyxl/xml/tests/test_functions.py
+++ b/openpyxl/xml/tests/test_functions.py
@@ -2,6 +2,7 @@ import pytest
 
 from openpyxl.xml.functions import ConditionalElement
 
+import xml
 
 @pytest.fixture
 def root():
@@ -50,3 +51,26 @@ def test_localtag(xml, tag):
     from .. functions import fromstring
     node = fromstring(xml)
     assert localname(node) == tag
+
+
[email protected]_required
+def test_dont_resolve():
+    from ..functions import fromstring
+    s = b"""<?xml version="1.0" encoding="ISO-8859-1"?>
+            <!DOCTYPE foo [
+            <!ELEMENT foo ANY >
+            <!ENTITY xxe SYSTEM "file:///dev/random" >]>
+            <foo>&xxe;</foo>"""
+    node = fromstring(s)
+
+
[email protected]_lxml
+def test_dont_resolve():
+    from ..functions import fromstring
+    s = b"""<?xml version="1.0" encoding="ISO-8859-1"?>
+            <!DOCTYPE foo [
+            <!ELEMENT foo ANY >
+            <!ENTITY xxe SYSTEM "file:///dev/random" >]>
+            <foo>&xxe;</foo>"""
+    with pytest.raises(xml.etree.ElementTree.ParseError):
+        node = fromstring(s)
--- a/pytest.ini
+++ b/pytest.ini
@@ -9,3 +9,4 @@ markers =
     not_py33: Do not run test on Python 3.
     lxml_required: lxml required to run test
     lxml_buffering: lxml >= 3.4.0 required
+    no_lxml: do not use lxml

Attachment: signature.asc
Description: PGP signature

Reply via email to