This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 6c7c2cdeacc9e1a176a969bd2b6de365a458c68e Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Tue Dec 28 21:10:17 2021 +0100 Parse `GEO_METADATA` and `GDAL_METADATA` tags as XML. For now they are shown only in the native metadata view. --- .../apache/sis/storage/geotiff/GeoTiffStore.java | 2 +- .../apache/sis/storage/geotiff/NativeMetadata.java | 36 ++- .../java/org/apache/sis/storage/geotiff/Tags.java | 14 +- .../apache/sis/storage/geotiff/XMLMetadata.java | 289 +++++++++++++++++++++ 4 files changed, 330 insertions(+), 11 deletions(-) diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/GeoTiffStore.java b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/GeoTiffStore.java index 333560a..ce301a1 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/GeoTiffStore.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/GeoTiffStore.java @@ -72,7 +72,7 @@ import org.apache.sis.util.resources.Errors; */ public class GeoTiffStore extends DataStore implements Aggregate { /** - * The encoding of strings in the metadata. The string specification said that is shall be US-ASCII, + * The encoding of strings in the metadata. The TIFF specification said that is shall be US-ASCII, * but Apache SIS nevertheless let the user specifies an alternative encoding if needed. */ final Charset encoding; diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/NativeMetadata.java b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/NativeMetadata.java index fbce05e..594a051 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/NativeMetadata.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/NativeMetadata.java @@ -63,12 +63,12 @@ final class NativeMetadata extends GeoKeysLoader { * Column for the name associated to the tag. * Value may be null if the name is unknown. */ - private static final TableColumn<CharSequence> NAME = TableColumn.NAME; + static final TableColumn<CharSequence> NAME = TableColumn.NAME; /** * Column for the value associated to the tag. */ - private static final TableColumn<Object> VALUE = TableColumn.VALUE; + static final TableColumn<Object> VALUE = TableColumn.VALUE; /** * The stream from which to read the data. @@ -137,8 +137,8 @@ final class NativeMetadata extends GeoKeysLoader { boolean visible; /* * Exclude the tags about location of tiles in the GeoTIFF files. - * Values of those tags are potentially large and rarely useful - * for human reading. + * Values of those tags are potentially large and rarely useful for human reading. + * This switch is only about tags to skip; special handlings of some tags are done later. */ switch (tag) { case Tags.TileOffsets: @@ -152,7 +152,12 @@ final class NativeMetadata extends GeoKeysLoader { final long offset = readInt(false); input.seek(Math.addExact(reader.origin, offset)); } + /* + * Some tags need to be handle in a special way. The main cases are GeoTIFF keys. + * But other cases exist (e.g. GEO_METADATA and GDAL_METADATA). + */ Object value = null; + XMLMetadata children = null; switch (tag) { case Tags.GeoKeyDirectory: { writeGeoKeys(); // Flush previous keys if any (should never happen). @@ -170,6 +175,15 @@ final class NativeMetadata extends GeoKeysLoader { visible = false; break; } + case Tags.GDAL_METADATA: + case Tags.GEO_METADATA: { + children = new XMLMetadata(reader, type, count, tag == Tags.GDAL_METADATA); + if (children.isEmpty()) { + // Fallback on showing array of numerical values. + value = type.readVector(input, count); + } + break; + } default: { value = type.readObject(input, count); if (value instanceof Vector) { @@ -191,10 +205,16 @@ final class NativeMetadata extends GeoKeysLoader { } } if (visible) { - final TreeTable.Node node = image.newChild(); - node.setValue(CODE, Short.toUnsignedInt(tag)); - node.setValue(NAME, Tags.name(tag)); - node.setValue(VALUE, value); + final String name = Tags.name(tag); + final TreeTable.Node node; + if (children != null) { + node = new XMLMetadata.Root(children, (DefaultTreeTable.Node) image, name); + } else { + node = image.newChild(); + node.setValue(NAME, name); + node.setValue(VALUE, value); + } + node.setValue(CODE, Short.toUnsignedInt(tag)); if (tag == Tags.GeoKeyDirectory) { geoNode = node; } diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/Tags.java b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/Tags.java index c0d4d28..c0a47af 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/Tags.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/Tags.java @@ -28,7 +28,7 @@ import java.lang.reflect.Field; * <a href="http://www.awaresystems.be/imaging/tiff/tifftags.html">TIFF Tag Reference</a> page.</p> * * @author Johann Sorel (Geomatys) - * @version 0.8 + * @version 1.2 * @since 0.8 * @module * @@ -119,11 +119,21 @@ final class Tags { ///////////////////////////////////////////////////////// + // OGC DGIWG EXTENSION TAGS // + ///////////////////////////////////////////////////////// + + /** + * Embedded XML-encoded instance documents prepared using 19139-based schema. + */ + public static final short GEO_METADATA = (short) 0xC6DD; + + + ///////////////////////////////////////////////////////// // GDAL EXTENSION TAGS // ///////////////////////////////////////////////////////// /** - * holds an XML list of name=value 'metadata' values about the image as a whole, and about specific samples. + * Holds an XML list of name=value 'metadata' values about the image as a whole, and about specific samples. * * @see <a href="http://www.awaresystems.be/imaging/tiff/tifftags/gdal_metadata.html">TIFF Tag GDAL_METADATA</a> */ diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/XMLMetadata.java b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/XMLMetadata.java new file mode 100644 index 0000000..8e7465d --- /dev/null +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/storage/geotiff/XMLMetadata.java @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.storage.geotiff; + +import java.util.Iterator; +import java.util.StringJoiner; +import java.io.IOException; +import java.io.StringReader; +import java.io.ByteArrayInputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.XMLEvent; +import javax.xml.stream.events.Attribute; +import javax.xml.stream.events.Characters; +import javax.xml.stream.events.StartElement; +import org.apache.sis.util.collection.TreeTable; +import org.apache.sis.util.collection.DefaultTreeTable; +import org.apache.sis.util.collection.TableColumn; +import org.apache.sis.util.resources.Errors; + + +/** + * Supports for metadata encoded in XML inside a GeoTIFF tags. + * This is a temporary object used only at parsing time. + * Two TIFF tags are associated to XML data: + * + * <ul> + * <li>{@code GDAL_METADATA} (A480) stored as ASCII characters.</li> + * <li>{@code GEO_METADATA} (C6DD) stored as bytes with UTF-8 encoding.</li> + * </ul> + * + * {@code GEO_METADATA} is defined by the Defense Geospatial Information Working Group (DGIWG) + * in the <cite>GeoTIFF Profile for Georeferenced Imagery</cite> standard. + * + * @author Martin Desruisseaux (Geomatys) + * @version 1.2 + * + * @see <a href="https://www.dgiwg.org/dgiwg-standards">DGIWG Standards</a> + * + * @since 1.2 + * @module + */ +final class XMLMetadata { + /** + * The bytes to decode as an XML document. + * DGIWG specification mandates UTF-8 encoding. + */ + private byte[] bytes; + + /** + * The XML document as string. + */ + private String string; + + /** + * Name of the XML element being processed. Used for error message only: + * this field is left to a non-null value if an exception occurred during XML parsing. + */ + private String currentElement; + + /** + * {@code true} if the XML is GDAL metadata. Example: + * + * {@preformat xml + * <GDALMetadata> + * <Item name="acquisitionEndDate">2016-09-08T15:53:00+05:00</Item> + * <Item name="acquisitionStartDate">2016-09-08T15:56:00+05:00</Item> + * </GDALMetadata> + * } + */ + private final boolean isGDAL; + + /** + * Creates new metadata which will decode the given vector of bytes. + * + * @param reader the TIFF reader. + * @param type type of the metadata tag to read. + * @param count number of bytes or characters in the value to read. + * @param isGDAL {@code true} if the XML is GDAL metadata. + */ + XMLMetadata(final Reader reader, final Type type, final long count, final boolean isGDAL) throws IOException { + this.isGDAL = isGDAL; + switch (type) { + case ASCII: { + final String[] cs = type.readString(reader.input, count, reader.store.encoding); + switch (cs.length) { + case 0: break; + case 1: string = cs[0]; break; // Usual case. + default: string = String.join(System.lineSeparator(), cs); break; + } + break; + } + case BYTE: + case UBYTE: { + /* + * NoSuchElementException, ClassCastException and UnsupportedOperationException + * should never happen here because we verified that the vector type is byte. + */ + bytes = ((ByteBuffer) type.readVector(reader.input, count).buffer().get()).array(); + break; + } + } + } + + /** + * Returns {@code true} if the XML document could not be read. + */ + public boolean isEmpty() { + return bytes == null && string == null; + } + + /** + * Returns the XML document as a character string, or {@code null} if the document could not be read. + */ + public String toString() { + if (string == null) { + if (bytes == null) { + return null; + } + string = new String(bytes, StandardCharsets.UTF_8); + } + return string; + } + + /** + * Returns a reader for the XML document, or {@code null} if the document could not be read. + */ + private XMLEventReader toXML() throws XMLStreamException { + final XMLInputFactory factory = XMLInputFactory.newFactory(); + if (bytes != null) { + return factory.createXMLEventReader(new ByteArrayInputStream(bytes), "UTF-8"); + } else if (string != null) { + return factory.createXMLEventReader(new StringReader(string)); + } else { + return null; + } + } + + /** + * A tree-table representation of the XML document contained in the enclosing {@link XMLMetadata}. + * The root node contains the XML document as a {@linkplain #getUserObject() user object}. + * It allows JavaFX application to support the "copy to clipboard" operation. + */ + static final class Root extends DefaultTreeTable.Node { + /** + * Column for the name associated to the element. + */ + private static final TableColumn<CharSequence> NAME = NativeMetadata.NAME; + + /** + * Column for the value associated to the element. + */ + private static final TableColumn<Object> VALUE = NativeMetadata.VALUE; + + /** + * A string representation of the XML document. + * + * @see #getUserObject() + */ + private final String xml; + + /** + * Returns the XML document as a user object. + * It allows JavaFX application to support the "copy to clipboard" operation. + */ + @Override + public Object getUserObject() { + return xml; + } + + /** + * Converts the XML document to a tree table. + * This method writes in the {@link NativeMetadata#NAME} and {@link NativeMetadata#VALUE} columns. + * If an exception occurs during XML parsing, then the node content will be set to the raw XML and + * the only child will be the {@link Throwable}. The error message will appear as a single line + * when the tree node values are formatted by {@link Object#toString()}, but the full stack trace + * is available if the user invokes {@code getValue(NativeMetadata.VALUE)}. + * It allows GUI applications to provide details if requested. + * + * @param source the XML document to represent as a tree table. + * @param target where to append this root node. + * @return {@code true} on success, or {@code false} if the XML document could not be decoded. + */ + Root(final XMLMetadata source, final DefaultTreeTable.Node parent, final String name) { + super(parent); + xml = source.toString(); + source.currentElement = name; + setValue(NAME, name); + try { + final XMLEventReader reader = source.toXML(); + if (reader != null) { + while (reader.hasNext()) { + final XMLEvent event = reader.nextEvent(); + if (event.isStartElement()) { + source.append(reader, event.asStartElement(), newChild()); + } + } + reader.close(); + } + } catch (XMLStreamException e) { + getChildren().clear(); + setValue(VALUE, xml); + final TreeTable.Node child = newChild(); + child.setValue(NAME, Errors.format(Errors.Keys.CanNotRead_1, source.currentElement)); + child.setValue(VALUE, e); // We want the full throwable, not only its string representation. + } + source.currentElement = null; + } + } + + /** + * Converts an XML element and its children to a tree table node. + * This is used for {@link NativeMetadata} representation. + * + * @param reader the XML reader with its cursor set after the XML element. + * @param element the XML element to append. + * @param node an initially empty node which is added to the tree. + */ + private void append(final XMLEventReader reader, final StartElement element, final TreeTable.Node node) + throws XMLStreamException + { + final String previous = currentElement; + currentElement = element.getName().getLocalPart(); + node.setValue(Root.NAME, currentElement); + final boolean isItem = isGDAL && currentElement.equals("Item"); + final Iterator<Attribute> attributes = element.getAttributes(); + while (attributes.hasNext()) { + final Attribute attribute = attributes.next(); + if (attribute.isSpecified()) { + final String name = attribute.getName().getLocalPart(); + final String value = attribute.getValue(); + if (isItem && name.equals("name")) { + /* + * GDAL metadata does not really use of XML schema. + * Instead, it is a collection of lines like below: + * + * <Item name="acquisitionEndDate">2016-09-08T15:53:00+05:00</Item> + * + * For more natural tree, we rename the "Item" element using the name + * specified by the attribute ("acquisitionEndDate" is above example). + */ + node.setValue(Root.NAME, value); + } else { + final TreeTable.Node child = node.newChild(); + child.setValue(Root.NAME, name); + child.setValue(Root.VALUE, value); + } + } + } + final StringJoiner buffer = new StringJoiner(""); + while (reader.hasNext()) { + final XMLEvent event = reader.nextEvent(); + if (event.isStartElement()) { + append(reader, event.asStartElement(), node.newChild()); + } + if (event.isCharacters()) { + final Characters characters = event.asCharacters(); + if (!characters.isWhiteSpace()) { + buffer.add(characters.getData()); + } + } + if (event.isEndElement()) { + break; + } + } + final String value = buffer.toString(); + if (!value.isEmpty()) { + node.setValue(Root.VALUE, value); + } + currentElement = previous; + } +}