Source: diffoscope Version: 84 Severity: wishlist Added XML Comparator as requested in our wishlist. This patch closes #866120 and shows XML some love.
Signed-off-by: Juliana Rodrigues <juliana.o...@gmail.com> --- diffoscope/comparators/__init__.py | 1 + diffoscope/comparators/xml.py | 101 +++++++++++++++++++++++++++++++++++++ tests/comparators/test_xml.py | 49 ++++++++++++++++++ tests/data/test1.xml | 9 ++++ tests/data/test2.xml | 9 ++++ tests/data/test_invalid.xml | 8 +++ tests/data/test_xml_expected_diff | 14 +++++ 7 files changed, 191 insertions(+) create mode 100644 diffoscope/comparators/xml.py create mode 100644 tests/comparators/test_xml.py create mode 100644 tests/data/test1.xml create mode 100644 tests/data/test2.xml create mode 100644 tests/data/test_invalid.xml create mode 100644 tests/data/test_xml_expected_diff diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py index d22aa79..7653741 100644 --- a/diffoscope/comparators/__init__.py +++ b/diffoscope/comparators/__init__.py @@ -40,6 +40,7 @@ class ComparatorManager(object): ('ps.PsFile',), ('javascript.JavaScriptFile',), ('json.JSONFile',), + ('xml.XMLFile',), ('text.TextFile',), ('bzip2.Bzip2File',), ('cpio.CpioFile',), diff --git a/diffoscope/comparators/xml.py b/diffoscope/comparators/xml.py new file mode 100644 index 0000000..d46f1bc --- /dev/null +++ b/diffoscope/comparators/xml.py @@ -0,0 +1,101 @@ +import re + +from xml.dom import minidom +from diffoscope.difference import Difference +from diffoscope.comparators.utils.file import File +from xml.parsers.expat import ExpatError + +def _format(node): + """ + Removes *inplace* spaces from minidom.Document + + Args: + node -- A xml.dom.minidom.Document object + + Returns: + void + """ + for n in node.childNodes: + if n.nodeType == minidom.Node.TEXT_NODE: + if n.nodeValue: n.nodeValue = n.nodeValue.strip() + elif n.nodeType == minidom.Node.ELEMENT_NODE: + _format(n) + +def _parse(file): + """ + Formats a minidom.Document file and returns XML as string. + + Args: + file -- An io.TextIOWrapper object + + Returns: + str: formated string object + """ + xml = minidom.parse(file) + _format(xml) + xml.normalize() + return xml.toprettyxml(indent=2*' ') + + +class XMLFile(File): + """ + XML Files Comparison class + + Attributes: + RE_FILE_EXTENSION (SRE_Pattern): xml file extension pattern + """ + RE_FILE_EXTENSION = re.compile(r'\.xml$') + + @staticmethod + def recognizes(file): + """ + Identifies if a given file has XML extension + + Args: + file - a diffoscope.comparators.utils.file.File object + + Returns: + False if file is not a XML File, True otherwise + """ + if XMLFile.RE_FILE_EXTENSION.search(file.name) is None: + return False + + with open(file.path) as f: + try: + file.parsed = _parse(f) + except ExpatError: + return False + + return True + + def compare_details(self, other, source=None): + """ + Compares self.object with another, returning a Difference object + + Args: + other -- A XMLFile object + source + + Returns: + A diffoscope.difference.Difference object + """ + return [ Difference.from_text(self.dumps(self), self.dumps(other), + self.path, other.path)] + + def dumps(self, file): + """ + Opens a XMLFile and returns its parsed content + + Args: + file -- XMLFile object + + Returns: + str -- Formatted XML content from file + """ + if file.parsed: + return file.parsed + + with open(file.path) as f: + return _parse(f) + + diff --git a/tests/comparators/test_xml.py b/tests/comparators/test_xml.py new file mode 100644 index 0000000..e8e0aed --- /dev/null +++ b/tests/comparators/test_xml.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# diffoscope: in-depth comparison of files, archives, and directories +# +# Copyright © 2016 Chris Lamb <la...@debian.org> +# +# diffoscope is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# diffoscope is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with diffoscope. If not, see <https://www.gnu.org/licenses/>. + +import pytest + +from diffoscope.comparators.xml import XMLFile + +from ..utils.data import load_fixture, get_data +from ..utils.nonexisting import assert_non_existing + + +xml_a = load_fixture('test1.xml') +xml_b = load_fixture('test2.xml') +invalid_xml = load_fixture('test_invalid.xml') + +def test_identification(xml_a): + assert isinstance(xml_a, XMLFile) + +def test_invalid(invalid_xml): + assert not isinstance(invalid_xml, XMLFile) + +def test_no_differences(xml_a): + assert xml_a.compare(xml_a) is None + +@pytest.fixture +def differences(xml_a, xml_b): + return xml_a.compare(xml_b).details + +def test_diff(differences): + expected_diff = get_data('test_xml_expected_diff') + assert differences[0].unified_diff == expected_diff + + diff --git a/tests/data/test1.xml b/tests/data/test1.xml new file mode 100644 index 0000000..b02bf09 --- /dev/null +++ b/tests/data/test1.xml @@ -0,0 +1,9 @@ +<note> + <style type="text/css" id="night-mode-pro-style" /> + <link type="text/css" rel="stylesheet" + id="night-mode-pro-link" /> + <to>Tove</to> + <from>Jani</from> + <heading>Reminder</heading> + <body>Don't forget me this weekend!</body> +</note> diff --git a/tests/data/test2.xml b/tests/data/test2.xml new file mode 100644 index 0000000..7e892f8 --- /dev/null +++ b/tests/data/test2.xml @@ -0,0 +1,9 @@ +<note> + <style type="text/css" id="night-mode-pro-style" /> + <link type="text/css" rel="stylesheet" + id="night-mode-pro-link" /> + <to>Jani</to> + <from>Toni</from> + <heading>Re: Reminder</heading> + <body>Pick me up on 5!</body> +</note> diff --git a/tests/data/test_invalid.xml b/tests/data/test_invalid.xml new file mode 100644 index 0000000..2a4cd51 --- /dev/null +++ b/tests/data/test_invalid.xml @@ -0,0 +1,8 @@ +<note> + style type="text/css" id="night-mode-pro-style" /> + <link type="text/css" rel="stylesheet" + id="night-mode-pro-link" /> + <to>Tove</to> + from>Jani</from> + <heading>Reminder</heading> + <body>Don't forget me this weekend!</body> diff --git a/tests/data/test_xml_expected_diff b/tests/data/test_xml_expected_diff new file mode 100644 index 0000000..0b450cb --- /dev/null +++ b/tests/data/test_xml_expected_diff @@ -0,0 +1,14 @@ +@@ -1,9 +1,9 @@ + <?xml version="1.0" ?> + <note> + <style id="night-mode-pro-style" type="text/css"/> + <link id="night-mode-pro-link" rel="stylesheet" type="text/css"/> +- <to>Tove</to> +- <from>Jani</from> +- <heading>Reminder</heading> +- <body>Don't forget me this weekend!</body> ++ <to>Jani</to> ++ <from>Toni</from> ++ <heading>Re: Reminder</heading> ++ <body>Pick me up on 5!</body> + </note> -- 2.13.2