[ 
https://issues.apache.org/jira/browse/HADOOP-18469?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17611635#comment-17611635
 ] 

ASF GitHub Bot commented on HADOOP-18469:
-----------------------------------------

steveloughran commented on code in PR #4940:
URL: https://github.com/apache/hadoop/pull/4940#discussion_r984671783


##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java:
##########
@@ -24,6 +24,7 @@
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   it, add a newline



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java:
##########
@@ -101,6 +101,7 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   can you add a newline



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -61,4 +67,45 @@ public static void transform(
     // and send the output to a Result object.
     transformer.transform(new StreamSource(xml), new StreamResult(out));
   }
+
+  public static DocumentBuilderFactory newSecureDocumentBuilderFactory()
+          throws ParserConfigurationException {
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+    dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true);

Review Comment:
   why not make these constant and share across all methods



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.SAXParser;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestXMLUtils {

Review Comment:
   1. prefer assertJ assertThat() asserts, which are much more informative. 
they save writing the assert failure messages which i would otherwise be asking 
for...
   
   2.  what about a test for security: that you cannot do entity expansion or 
references? as that is what we want from the production code



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java:
##########
@@ -31,6 +31,7 @@
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   put below L44



##########
hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java:
##########
@@ -17,36 +17,34 @@
  */
 package org.apache.hadoop.tools.rumen;
 
+import java.io.StringReader;
 import java.util.Properties;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 
-import java.nio.charset.Charset;
-
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.ParserConfigurationException;
 
 import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   add a newline



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.SAXParser;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestXMLUtils {
+
+  @Test
+  public void testSecureDocumentBuilderFactory() throws Exception {
+    DocumentBuilder db = 
XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    assertNotNull(doc);
+  }
+
+  @Test
+  public void testSecureSAXParserFactory() throws Exception {
+    SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser();
+    parser.parse(new InputSource(new StringReader("<root/>")), new 
DefaultHandler());
+  }
+
+  @Test
+  public void testSecureTransformerFactory() throws Exception {
+    Transformer transformer = 
XMLUtils.newSecureTransformerFactory().newTransformer();
+    DocumentBuilder db = 
XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    try (StringWriter stringWriter = new StringWriter()) {
+      transformer.transform(new DOMSource(doc), new 
StreamResult(stringWriter));
+      assertTrue(stringWriter.toString().contains("<root"));

Review Comment:
   use assertJ string asserts



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;

Review Comment:
   afraid your imports are out of sync with most of the hadoop stuff, which is 
   
   java.*
   javax
   
   non-asf
   
   asf
   
   static



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -61,4 +67,45 @@ public static void transform(
     // and send the output to a Result object.
     transformer.transform(new StreamSource(xml), new StreamResult(out));
   }
+
+  public static DocumentBuilderFactory newSecureDocumentBuilderFactory()

Review Comment:
   javadocs, including SHOULD BE way to create new xml parsers



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -18,11 +18,17 @@
 
 package org.apache.hadoop.util;
 
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.*;
+import javax.xml.transform.sax.SAXTransformerFactory;
 import javax.xml.transform.stream.*;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.xml.sax.SAXException;

Review Comment:
   nit, separate group from apache imports





> Add XMLUtils methods to centralise code that creates secure XML parsers
> -----------------------------------------------------------------------
>
>                 Key: HADOOP-18469
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18469
>             Project: Hadoop Common
>          Issue Type: Improvement
>            Reporter: PJ Fanning
>            Priority: Major
>              Labels: pull-request-available
>
> Relates to HDFS-16766
> There are other places in the code where DocumentBuilderFactory instances are 
> created that could benefit from the same changes as HDFS-16766



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to