This is an automated email from the ASF dual-hosted git repository. asf-gitbox-commits pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 7d99b77b9b3fffb4fc1d5595cac36d31a01e1a1f Author: Martin Desruisseaux <[email protected]> AuthorDate: Thu May 7 12:08:43 2026 +0200 Add an option for allowing users to supply their own `XMLInputFactory`. Reuse the check done in `InputFactory` against unsupported properties. --- .../org/apache/sis/xml/PooledUnmarshaller.java | 8 ++--- .../sis/xml/internal/shared/InputFactory.java | 42 +++++++++++++--------- .../main/org/apache/sis/xml/package-info.java | 24 ++++++------- .../sis/xml/internal/shared/InputFactoryTest.java | 8 ++--- .../sis/storage/geotiff/reader/XMLMetadata.java | 3 +- .../sis/storage/xml/stream/StaxDataStore.java | 39 +++++++++++--------- .../main/org/apache/sis/storage/OptionKey.java | 12 +++++++ 7 files changed, 81 insertions(+), 55 deletions(-) diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/PooledUnmarshaller.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/PooledUnmarshaller.java index 84e86f0793..cbc6f82ba6 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/PooledUnmarshaller.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/PooledUnmarshaller.java @@ -56,10 +56,10 @@ import org.apache.sis.util.resources.Errors; * <ul> * <li>Save properties before modification, in order to restore them to their original values * when the unmarshaller is recycled.</li> - * <li>Constructs a SIS {@link Context} object on unmarshalling, in order to give - * additional information to the SIS object being unmarshalled.</li> - * <li>Wraps the input stream in a {@link TransformingReader} if the document GML version - * in not the SIS native GML version.</li> + * <li>Construct a <abbr>SIS</abbr> {@link Context} object on unmarshalling, + * in order to give additional information to the SIS object being unmarshalled.</li> + * <li>Wrap the input stream in a {@link TransformingReader} if the document <abbr>GML</abbr> + * version is not the <abbr>SIS</abbr> native <abbr>GML</abbr> version.</li> * </ul> * * @author Martin Desruisseaux (Geomatys) diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/internal/shared/InputFactory.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/internal/shared/InputFactory.java index facf5ee1f0..f2a35ad9e6 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/internal/shared/InputFactory.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/internal/shared/InputFactory.java @@ -37,14 +37,14 @@ import org.apache.sis.util.logging.Logging; /** - * Provides access to {@link XMLInputFactory} methods as static methods working on a SIS-wide instance. - * This convenience is provided in a separated class in order to allow the JVM to instantiate the factory - * only when first needed, when initializing this class. + * Provides access to {@link XMLInputFactory} methods as static methods working on a <abbr>SIS</abbr>-wide instance. + * This convenience class is provided in a separated class in order to allow the <abbr>JVM</abbr> to instantiate the + * factory only when first needed, when initializing this class. * * <h2>Security</h2> - * Unless the user has configured the {@code javax.xml.accessExternalDTD} property to something else - * than {@code "all"}, this class disallows external DTDs referenced by the {@code "file"} protocols. - * Allowed protocols are <abbr>HTTP</abbr> and <abbr>HTTPS</abbr> (list may be expanded if needed). + * Unless user has configured the {@code javax.xml.accessExternalDTD} property to something else than {@code "all"}, + * this class disallows external <abbr>DTD</abbr>s. While {@code "file"} is the most important protocol to disable + * for preventing leaks of sensitive data, other protocols are disabled too for avoiding recursive fetching. * * @author Martin Desruisseaux (Geomatys) * @@ -52,19 +52,26 @@ import org.apache.sis.util.logging.Logging; */ public final class InputFactory { /** - * The SIS-wide factory. This factory can be specified by the user, for example using the - * {@code javax.xml.stream.XMLInputFactory} system property or with {@code META-INF/services}. + * The <abbr>SIS</abbr>-wide factory. * - * @see org.apache.sis.storage.xml.stream.StaxDataStore#inputFactory() + * @todo Should be replaced by a field in the classes that need it. There are not many. */ - private static final XMLInputFactory FACTORY = XMLInputFactory.newInstance(); - static { + private static final XMLInputFactory FACTORY = newSecureFactory(); + + /** + * Creates a new <abbr>XML</abbr> factory with some security setting enabled. + * + * @return a new <abbr>XML</abbr> factory. + */ + public static XMLInputFactory newSecureFactory() { + final XMLInputFactory factory = XMLInputFactory.newFactory(); + if (factory.isPropertySupported(XMLConstants.FEATURE_SECURE_PROCESSING)) { + factory.setProperty(XMLConstants.FEATURE_SECURE_PROCESSING, Boolean.TRUE); + } try { - if (FACTORY.isPropertySupported(XMLConstants.FEATURE_SECURE_PROCESSING)) { - FACTORY.setProperty(XMLConstants.FEATURE_SECURE_PROCESSING, Boolean.TRUE); - } - if ("all".equals(FACTORY.getProperty(XMLConstants.ACCESS_EXTERNAL_DTD))) { - FACTORY.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + // No `isPropertySupported(…)` because support of this property is required. + if ("all".equals(factory.getProperty(XMLConstants.ACCESS_EXTERNAL_DTD))) { + factory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); } } catch (IllegalArgumentException e) { /* @@ -74,8 +81,9 @@ public final class InputFactory { */ final var record = new LogRecord(Level.CONFIG, e.getLocalizedMessage()); record.setThrown(e); - Logging.completeAndLog(Logger.getLogger(Loggers.XML), InputFactory.class, "createXMLEventReader", record); + Logging.completeAndLog(Logger.getLogger(Loggers.XML), InputFactory.class, "newSecureFactory", record); } + return factory; } /** diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/package-info.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/package-info.java index 0af032a3d2..eae744a91e 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/package-info.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/package-info.java @@ -16,9 +16,9 @@ */ /** - * Provides methods for marshalling and unmarshalling SIS objects in XML. - * The XML format is compliant with ISO 19115-3 specification for metadata, - * and compliant with GML for referencing objects. + * Provides methods for marshalling and unmarshalling SIS objects in <abbr>XML</abbr>. + * The <abbr>XML</abbr> format is compliant with <abbr>ISO</abbr> 19115-3 specification for metadata, + * and compliant with <abbr>GML</abbr> for referencing objects. * * <p>The main class in this package is {@link org.apache.sis.xml.XML}, which provides * property keys that can be used for configuring (un)marshallers and convenience static methods. @@ -41,16 +41,16 @@ * </cit:CI_Citation> * } * - * <h2>Customizing the XML</h2> - * In order to parse and format ISO 19115-3 compliant documents, SIS needs its own - * {@link jakarta.xml.bind.Marshaller} and {@link jakarta.xml.bind.Unmarshaller} instances - * (which are actually wrappers around standard instances). Those instances are created - * and cached by {@link org.apache.sis.xml.MarshallerPool}, which is used internally by - * the above-cited {@code XML} class. However, developers can instantiate their own - * {@code MarshallerPool} in order to get more control on the marshalling and unmarshalling - * processes, including the namespace URLs and the errors handling. + * <h2>Customizing the <abbr>XML</abbr></h2> + * In order to parse and format <abbr>ISO</abbr> 19115-3 compliant documents, + * Apache <abbr>SIS</abbr> needs its own {@link jakarta.xml.bind.Marshaller} + * and {@link jakarta.xml.bind.Unmarshaller} instances + * (which are actually wrappers around standard instances). + * Those instances are created and cached by {@link org.apache.sis.xml.MarshallerPool}. + * Developers can instantiate their own {@code MarshallerPool} if they need to configure, + * properties such as the namespace <abbr>URL</abbr>s and the errors handling. * - * <p>The most common namespace URLs are defined in the {@link org.apache.sis.xml.Namespaces} class. + * <p>The most common namespace <abbr>URL</abbr>s are defined in the {@link org.apache.sis.xml.Namespaces} class. * The parsing of some objects like {@link java.net.URL} and {@link java.util.UUID}, * together with the behavior in case of parsing error, can be specified by the * {@link org.apache.sis.xml.ValueConverter} class.</p> diff --git a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/internal/shared/InputFactoryTest.java b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/internal/shared/InputFactoryTest.java index fecdc86aa1..2a6a1f4be5 100644 --- a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/internal/shared/InputFactoryTest.java +++ b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/internal/shared/InputFactoryTest.java @@ -68,7 +68,7 @@ public final class InputFactoryTest extends TestCase { @Test @Disabled("JAXP09020006") // NullPointerException: argument 'catalog' cannot be NULL. TODO: test with Maven4. public void verifyExternalEntityAccess() throws IOException, XMLStreamException { - final XMLInputFactory factory = XMLInputFactory.newInstance(); + final XMLInputFactory factory = XMLInputFactory.newDefaultFactory(); assumeTrue("all".equals(factory.getProperty(XMLConstants.ACCESS_EXTERNAL_DTD))); assumeTrue(Boolean.TRUE.equals(factory.getProperty(XMLInputFactory.SUPPORT_DTD))); assumeTrue(Boolean.TRUE.equals(factory.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES))); @@ -84,7 +84,7 @@ public final class InputFactoryTest extends TestCase { */ @Test public void testDisableExternalEntities() throws IOException, XMLStreamException { - final XMLInputFactory factory = XMLInputFactory.newInstance(); + final XMLInputFactory factory = XMLInputFactory.newDefaultFactory(); factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE); readExternalEntity(factory, true); } @@ -97,7 +97,7 @@ public final class InputFactoryTest extends TestCase { */ @Test public void testDisableDTD() throws IOException { - final XMLInputFactory factory = XMLInputFactory.newInstance(); + final XMLInputFactory factory = XMLInputFactory.newDefaultFactory(); factory.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.FALSE); var ex = assertThrows(XMLStreamException.class, () -> readExternalEntity(factory, false)); assertMessageContains(ex, "xxe"); @@ -112,7 +112,7 @@ public final class InputFactoryTest extends TestCase { @Test @Disabled("JAXP09020006") // NullPointerException: argument 'catalog' cannot be NULL. TODO: test with Maven4. public void testDisableAccessExternalDTD() throws IOException { - final XMLInputFactory factory = XMLInputFactory.newInstance(); + final XMLInputFactory factory = XMLInputFactory.newDefaultFactory(); factory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "http,https"); var ex = assertThrows(XMLStreamException.class, () -> readExternalEntity(factory, false)); assertMessageContains(ex, "sis-"); // Prefix of the temporary file. diff --git a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/reader/XMLMetadata.java b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/reader/XMLMetadata.java index 8abcbb0e7b..fc8ae4e450 100644 --- a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/reader/XMLMetadata.java +++ b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/reader/XMLMetadata.java @@ -207,7 +207,8 @@ public final class XMLMetadata implements Filter { } /** - * Returns a reader for the XML document, or {@code null} if the document could not be read. + * Returns a reader for the <abbr>XML</abbr> document, + * or {@code null} if the document could not be read. */ private XMLEventReader toXML() throws XMLStreamException { if (bytes != null) { diff --git a/endorsed/src/org.apache.sis.storage.xml/main/org/apache/sis/storage/xml/stream/StaxDataStore.java b/endorsed/src/org.apache.sis.storage.xml/main/org/apache/sis/storage/xml/stream/StaxDataStore.java index e96f908262..0d4e5c5391 100644 --- a/endorsed/src/org.apache.sis.storage.xml/main/org/apache/sis/storage/xml/stream/StaxDataStore.java +++ b/endorsed/src/org.apache.sis.storage.xml/main/org/apache/sis/storage/xml/stream/StaxDataStore.java @@ -20,6 +20,7 @@ import java.util.Locale; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Filter; +import java.util.function.Supplier; import java.io.Closeable; import java.io.IOException; import java.io.InputStream; @@ -27,7 +28,6 @@ import java.io.OutputStream; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.nio.charset.Charset; -import javax.xml.XMLConstants; import javax.xml.stream.Location; import javax.xml.stream.XMLReporter; import javax.xml.stream.XMLInputFactory; @@ -36,6 +36,7 @@ import javax.xml.stream.XMLStreamReader; import javax.xml.stream.XMLStreamWriter; import javax.xml.stream.XMLStreamException; import org.apache.sis.xml.XML; +import org.apache.sis.xml.internal.shared.InputFactory; import org.apache.sis.storage.OptionKey; import org.apache.sis.storage.DataStore; import org.apache.sis.storage.StorageConnector; @@ -177,11 +178,12 @@ public abstract class StaxDataStore extends URIDataStore { */ protected StaxDataStore(final StaxDataStoreProvider provider, final StorageConnector connector) throws DataStoreException { super(provider, connector); - final Integer indent; + final Supplier<XMLInputFactory> supplier = connector.getOption(OptionKey.XML_INPUT_FACTORY); + if (supplier != null) { + inputFactory = supplier.get(); + } storage = connector.getStorage(); - indent = connector.getOption(OptionKey.INDENTATION); - indentation = (indent == null) ? Constants.DEFAULT_INDENTATION - : (byte) Math.max(WKTFormat.SINGLE_LINE, Math.min(120, indent)); + indentation = indentation(connector.getOption(OptionKey.INDENTATION)); configuration = new Config(); storageToWriter = OutputType.forType(storage.getClass()); storageToReader = InputType.forType(storage.getClass()); @@ -218,6 +220,18 @@ public abstract class StaxDataStore extends URIDataStore { } } + /** + * Returns a valid indentation value from the given property value. + * Value -1 means to format everything on a single line. + * + * @param indentation value associated to {@link OptionKey#INDENTATION}. + * @return indentation to use, or -1 for formatting on a single line. + */ + private static byte indentation(final Integer indentation) { + if (indentation == null) return Constants.DEFAULT_INDENTATION; + return (byte) Math.max(WKTFormat.SINGLE_LINE, Math.min(120, indentation)); + } + /** * Marks the current stream position. This method shall be invoked either at construction time, * or after a new stream has been created. @@ -255,7 +269,7 @@ public abstract class StaxDataStore extends URIDataStore { * Holds information that can be used for (un)marshallers configuration, and opportunistically * implement various listeners used by JAXB (actually the SIS wrappers) or StAX. */ - private final class Config extends AbstractMap<String,Object> implements XMLReporter, Filter { + private final class Config extends AbstractMap<String, Object> implements XMLReporter, Filter { /** * Fetches configuration information from the given object. */ @@ -376,22 +390,13 @@ public abstract class StaxDataStore extends URIDataStore { * * <h4>Security</h4> * Unless the user has configured the {@code javax.xml.accessExternalDTD} property to something else - * than {@code "all"}, this class disallows external DTDs referenced by the {@code "file"} protocols. - * Allowed protocols are <abbr>HTTP</abbr> and <abbr>HTTPS</abbr> (list may be expanded if needed). - * - * @see org.apache.sis.xml.internal.shared.InputFactory#FACTORY + * than {@code "all"}, this class disallows external <abbr>DTD</abbr>s. */ final XMLInputFactory inputFactory() { assert Thread.holdsLock(this); XMLInputFactory factory = inputFactory; if (factory == null) { - factory = XMLInputFactory.newInstance(); - if (factory.isPropertySupported(XMLConstants.FEATURE_SECURE_PROCESSING)) { - factory.setProperty(XMLConstants.FEATURE_SECURE_PROCESSING, Boolean.TRUE); - } - if ("all".equals(factory.getProperty(XMLConstants.ACCESS_EXTERNAL_DTD))) { - factory.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - } + factory = InputFactory.newSecureFactory(); factory.setXMLReporter(configuration); inputFactory = factory; // Set only on success. } diff --git a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/OptionKey.java b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/OptionKey.java index ca0dfd18a1..2ee060376f 100644 --- a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/OptionKey.java +++ b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/OptionKey.java @@ -26,6 +26,8 @@ import java.nio.file.Path; import java.nio.file.OpenOption; import java.nio.file.StandardOpenOption; import java.io.ObjectStreamException; +import javax.xml.XMLConstants; +import javax.xml.stream.XMLInputFactory; import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.apache.sis.util.logging.Logging; import org.apache.sis.setup.GeometryLibrary; @@ -181,6 +183,16 @@ public class OptionKey<T> extends org.apache.sis.setup.OptionKey<T> { @SuppressWarnings({"rawtypes", "unchecked"}) public static final OptionKey<Supplier<ByteBuffer>> BYTE_BUFFER = new OptionKey("BYTE_BUFFER", Supplier.class); + /** + * Provider of a factory for creating <abbr>XML</abbr> stream readers or event readers. + * If this option is not specified or if the supplier returns {@code null}, Apache <abbr>SIS</abbr> uses a default + * factory with the {@link XMLConstants#ACCESS_EXTERNAL_DTD} property set to an empty string for security reasons. + * If a factory is supplied, the suppliers should {@linkplain XMLInputFactory#setProperty(String, Object) set the + * factory properties} itself for the desired security level. + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + public static final OptionKey<Supplier<XMLInputFactory>> XML_INPUT_FACTORY = new OptionKey("XML_INPUT_FACTORY", Supplier.class); + /** * Path to an auxiliary file containing metadata encoded in an <abbr>ISO</abbr> 19115-3 <abbr>XML</abbr> document. * The given path, if not absolute, is relative to the path of the main storage file.
