This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new f318d2564c Add parsing of statistics auxiliary file (*.stx) with BIL/BIP/BSQ and ASCII Grid reader. f318d2564c is described below commit f318d2564c466d7319373fe567391218b9e5fd3e Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Mon Apr 25 12:02:39 2022 +0200 Add parsing of statistics auxiliary file (*.stx) with BIL/BIP/BSQ and ASCII Grid reader. --- .../main/java/org/apache/sis/math/Statistics.java | 68 ++++++++- .../java/org/apache/sis/math/StatisticsTest.java | 18 ++- .../sis/internal/storage/esri/RasterStore.java | 152 +++++++++++++++++---- .../sis/internal/storage/esri/RawRasterStore.java | 2 +- .../sis/internal/storage/esri/WritableStore.java | 1 - 5 files changed, 208 insertions(+), 33 deletions(-) diff --git a/core/sis-utility/src/main/java/org/apache/sis/math/Statistics.java b/core/sis-utility/src/main/java/org/apache/sis/math/Statistics.java index 92c77555ee..b89e6b50b1 100644 --- a/core/sis-utility/src/main/java/org/apache/sis/math/Statistics.java +++ b/core/sis-utility/src/main/java/org/apache/sis/math/Statistics.java @@ -23,6 +23,8 @@ import java.util.function.DoubleConsumer; import org.opengis.util.InternationalString; import org.apache.sis.util.SimpleInternationalString; import org.apache.sis.util.ArgumentChecks; +import org.apache.sis.util.resources.Errors; +import org.apache.sis.internal.util.DoubleDouble; import static java.lang.Math.*; import static java.lang.Double.NaN; @@ -85,7 +87,7 @@ import static java.lang.Double.doubleToLongBits; * } * * @author Martin Desruisseaux (MPO, IRD, Geomatys) - * @version 1.0 + * @version 1.2 * @since 0.3 * @module */ @@ -156,9 +158,8 @@ public class Statistics implements DoubleConsumer, LongConsumer, Cloneable, Seri * If differences or discrete derivatives are wanted, use the {@link #forSeries forSeries(…)} * method instead.</p> * - * @param name the phenomenon for which this object is collecting statistics, or {@code null} - * if none. If non-null, then this name will be shown as column header in the table - * formatted by {@link StatisticsFormat}. + * @param name the phenomenon for which this object is collecting statistics, or {@code null} if none. + * If non-null, it will be shown as column header in the table formatted by {@link StatisticsFormat}. */ public Statistics(final CharSequence name) { if (name == null || name instanceof InternationalString) { @@ -168,6 +169,64 @@ public class Statistics implements DoubleConsumer, LongConsumer, Cloneable, Seri } } + /** + * Constructs a set of statistics initialized to the given values. + * The {@code countNaN} and {@code count} arguments must be positive. + * If {@code count} is 0, all following {@code double} arguments are ignored. + * Otherwise the following restrictions apply: + * + * <ul> + * <li>{@code minimum} and {@code maximum} arguments are mandatory and can not be {@link Double#NaN NaN}.</li> + * <li>{@code mean} argument is mandatory (can not be NaN) if {@code standardDeviation} is not NaN.</li> + * <li>{@code mean} and {@code standardDeviation} arguments can be both {@link Double#NaN NaN} if unknown, + * but statistics initialized that way will always return NaN from {@link #sum()}, {@link #mean()}, + * {@link #rms()} and {@link #standardDeviation(boolean)} methods.</li> + * </ul> + * + * @param name the phenomenon for which this object is collecting statistics, or {@code null} if none. + * @param countNaN the number of {@link Double#NaN NaN} samples. + * @param count the number of samples, excluding {@link Double#NaN NaN} values. + * @param minimum the minimum sample value. Ignored if {@code count} is zero. + * @param maximum the maximum sample value. Ignored if {@code count} is zero. + * @param mean the mean value. Ignored if {@code count} is zero. + * @param standardDeviation the standard deviation. Ignored if {@code count} is zero. + * @param allPopulation {@code true} if sample values were the totality of the population under study, + * or {@code false} if they were only a sampling. + * + * @since 1.2 + */ + public Statistics(final CharSequence name, final int countNaN, final int count, + final double minimum, final double maximum, final double mean, + final double standardDeviation, final boolean allPopulation) + { + this(name); + ArgumentChecks.ensurePositive("countNaN", this.countNaN = countNaN); + if (count != 0) { + ArgumentChecks.ensurePositive("count", this.count = count); + if (!((this.minimum = minimum) <= (this.maximum = maximum))) { // Use `!` for catching NaN. + throw new IllegalArgumentException(Errors.format(Errors.Keys.IllegalRange_2, minimum, maximum)); + } + if (!Double.isNaN(mean) || !Double.isNaN(standardDeviation)) { + ArgumentChecks.ensureBetween("mean", minimum, maximum, mean); + } + final DoubleDouble sd = DoubleDouble.createAndGuessError(mean); + sd.multiply(count); + sum = sd.value; + lowBits = sd.error; + /* + * squareSum = standardDeviation² × (allPopulation ? count : count-1) + sum²/count + */ + sd.square(); + sd.divide(count); + final DoubleDouble sq = DoubleDouble.createAndGuessError(standardDeviation); + sq.square(); + sq.multiply(allPopulation ? count : count-1); + sq.add(sd); + squareSum = sq.value; + squareLowBits = sq.error; + } + } + /** * Constructs a new {@code Statistics} object which will also compute finite differences * up to the given order. If the values to be given to the {@code accept(…)} methods are @@ -418,6 +477,7 @@ public class Statistics implements DoubleConsumer, LongConsumer, Cloneable, Seri /** * Returns the sum, or 0 if none. + * May also be NaN if that value was explicitly specified to the constructor. * * @return the sum, or 0 if none. */ diff --git a/core/sis-utility/src/test/java/org/apache/sis/math/StatisticsTest.java b/core/sis-utility/src/test/java/org/apache/sis/math/StatisticsTest.java index 0cf2ab753b..62763ae667 100644 --- a/core/sis-utility/src/test/java/org/apache/sis/math/StatisticsTest.java +++ b/core/sis-utility/src/test/java/org/apache/sis/math/StatisticsTest.java @@ -41,7 +41,7 @@ import static org.apache.sis.test.Assert.*; * that are known to produce results inside the range expected by this test class.</p> * * @author Martin Desruisseaux (Geomatys) - * @version 0.3 + * @version 1.2 * @since 0.3 * @module */ @@ -187,6 +187,22 @@ public final strictfp class StatisticsTest extends TestCase { } } + /** + * Tests {@link Statistics#Statistics(CharSequence, int, int, double, double, double, double, boolean)} + * constructor. + */ + @Test + public void testInitializationToValues() { + final Statistics stats = new Statistics("Test", 20, 128, -75, 12, 8, 0.2, false); + assertEquals("Test", stats.name().toString()); + assertEquals( 20, stats.countNaN()); + assertEquals(128, stats.count()); + assertEquals(-75, stats.minimum(), STRICT); + assertEquals( 12, stats.maximum(), STRICT); + assertEquals( 8, stats.mean(), STRICT); // Comparison can be strict because `count` is a power of 2. + assertEquals(0.2, stats.standardDeviation(false), 1E-14); + } + /** * Tests the concatenation of many {@link Statistics} objects. */ diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RasterStore.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RasterStore.java index 4db80b9a4b..6955c061b0 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RasterStore.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RasterStore.java @@ -20,6 +20,11 @@ import java.util.List; import java.util.Arrays; import java.util.Optional; import java.util.Hashtable; +import java.util.logging.Level; +import java.io.IOException; +import java.io.FileNotFoundException; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; import java.awt.image.ColorModel; import java.awt.image.SampleModel; import java.awt.image.BufferedImage; @@ -43,9 +48,11 @@ import org.apache.sis.internal.storage.MetadataBuilder; import org.apache.sis.internal.coverage.j2d.ColorModelFactory; import org.apache.sis.internal.coverage.j2d.ImageUtilities; import org.apache.sis.internal.storage.RangeArgument; +import org.apache.sis.internal.storage.Resources; import org.apache.sis.internal.util.UnmodifiableArrayList; import org.apache.sis.internal.util.Numerics; import org.apache.sis.util.resources.Vocabulary; +import org.apache.sis.util.CharSequences; import org.apache.sis.math.Statistics; @@ -81,6 +88,13 @@ abstract class RasterStore extends PRJDataStore implements GridCoverageResource */ static final String NCOLS = "NCOLS"; + /** + * The filename extension of {@code "*.stx"} and {@code "*.clr"} files. + * + * @see #getComponentFiles() + */ + private static final String STX = "stx", CLR = "clr"; + /** * The color model, created from the {@code "*.clr"} file content when first needed. * The color model and sample dimensions are created together because they depend on @@ -120,6 +134,17 @@ abstract class RasterStore extends PRJDataStore implements GridCoverageResource listeners.useWarningEventsOnly(); } + /** + * Returns the {@linkplain #location} as a {@code Path} component together with auxiliary files. + * + * @return the main file and auxiliary files as paths, or an empty array if unknown. + * @throws DataStoreException if the URI can not be converted to a {@link Path}. + */ + @Override + public Path[] getComponentFiles() throws DataStoreException { + return listComponentFiles(PRJ, STX, CLR); + } + /** * Returns the spatiotemporal extent of the raster file. * @@ -157,60 +182,135 @@ abstract class RasterStore extends PRJDataStore implements GridCoverageResource throw new DataStoreReferencingException(getLocale(), formatName, getDisplayName(), null).initCause(e); } /* - * Do not add the sample dimensions because in current version computing the sample dimensions without - * statistics requires loading the full image. Even if `GridCoverage.getSampleDimensions()` exists and - * could be used opportunistically, we do not use it in order to keep a deterministic behavior + * Do not invoke `getSampleDimensions()` because computing sample dimensions without statistics + * may cause the loading of the full image. Even if `GridCoverage.getSampleDimensions()` exists + * and could be used opportunistically, we do not use it in order to keep a deterministic behavior * (we do not want the metadata to vary depending on the order in which methods are invoked). */ + if (sampleDimensions != null) { + for (final SampleDimension band : sampleDimensions) { + builder.addNewBand(band); + } + } addTitleOrIdentifier(builder); builder.setISOStandards(false); metadata = builder.buildAndFreeze(); } + /** + * Reads the {@code "*.stx"} auxiliary file. Syntax is as below, with one line per band. + * Value between {…} are optional and can be skipped with a # sign in place of the number. + * + * <pre>band minimum maximum {mean} {std_deviation} {linear_stretch_min} {linear_stretch_max}</pre> + * + * The specification said that lines that do not start with a number shall be ignored as comment. + * + * @todo Stretch values are not yet stored. + * + * @param numBands length of the array to return. + * @return statistics for each band. Some elements may be null if not specified in the file. + * @throws NoSuchFileException if the auxiliary file has not been found (when opened from path). + * @throws FileNotFoundException if the auxiliary file has not been found (when opened from URL). + * @throws IOException if another error occurred while opening the stream. + * @throws NumberFormatException if a number can not be parsed. + */ + private Statistics[] readStatistics(final String name, final SampleModel sm, final int numBands) + throws DataStoreException, IOException + { + final Statistics[] stats = new Statistics[numBands]; + for (final CharSequence line : CharSequences.splitOnEOL(readAuxiliaryFile(STX))) { + final int end = CharSequences.skipTrailingWhitespaces(line, 0, line.length()); + final int start = CharSequences.skipLeadingWhitespaces(line, 0, end); + if (start < end && Character.isDigit(Character.codePointAt(line, start))) { + int column = 0; + int band = 0; + double minimum = Double.NaN; + double maximum = Double.NaN; + double mean = Double.NaN; + double stdev = Double.NaN; + for (final CharSequence item : CharSequences.split(line.subSequence(start, end), ' ')) { + if (item.length() != 0) { + if (column == 0) { + band = Integer.parseInt(item.toString()); + } else if (item.charAt(0) != '#') { + final double value = Double.parseDouble(item.toString()); + switch (column) { + case 1: minimum = value; break; + case 2: maximum = value; break; + case 3: mean = value; break; + case 4: stdev = value; break; + } + } + column++; + } + } + if (band >= 1 && band <= stats.length) { + final int count = Math.multiplyExact(sm.getWidth(), sm.getHeight()); + stats[band - 1] = new Statistics(name, 0, count, minimum, maximum, mean, stdev, true); + } + } + } + return stats; + } + /** * Loads {@code "*.stx"} and {@code "*.clr"} files if present then builds {@link #sampleDimensions} and * {@link #colorModel} from those information. If no color map is found, a grayscale color model is created. * * @param name name to use for the sample dimension, or {@code null} if untitled. * @param sm the sample model to use for creating a default color model if no {@code "*.clr"} file is found. - * @param stats if the caller collected statistics by itself, those statistics. Otherwise {@code null}. + * @param stats if the caller collected statistics by itself, those statistics for each band. Otherwise empty. + * @throws DataStoreException if an error occurred while loading an auxiliary file. */ - final void loadBandDescriptions(String name, final SampleModel sm, final Statistics stats) { + final void loadBandDescriptions(String name, final SampleModel sm, Statistics... stats) throws DataStoreException { final SampleDimension[] bands = new SampleDimension[sm.getNumBands()]; - final int dataType = sm.getDataType(); /* - * TODO: read color map and statistics. - * - * Fallback when no statistics auxiliary file was found. - * Try to infer the minimum and maximum from data type. + * If the "*.stx" file is found, the statistics read from that file will replace the specified one. + * Otherwise the `stats` parameter will be left unchanged. We read statistics even if a color map + * overwrite them because we need the minimum/maximum values for building the sample dimensions. + */ + try { + stats = readStatistics(name, sm, bands.length); + } catch (NoSuchFileException | FileNotFoundException e) { + listeners.warning(Level.FINE, Resources.format(Resources.Keys.CanNotReadAuxiliaryFile_1, STX), e); + } catch (IOException | NumberFormatException e) { + throw new DataStoreReferencingException(Resources.format(Resources.Keys.CanNotReadAuxiliaryFile_1, STX), e); + } + /* + * Build the sample dimensions and the color model. + * Some minimum/maximum values will be used as fallback if no statistics were found. */ - double minimum = 0; - double maximum = 1; - boolean computeForEachBand = false; + final int dataType = sm.getDataType(); final boolean isInteger = ImageUtilities.isIntegerType(dataType); final boolean isUnsigned = isInteger && ImageUtilities.isUnsignedType(sm); final boolean isRGB = isInteger && (bands.length == 3 || bands.length == 4); - if (stats != null && stats.count() != 0) { - minimum = stats.minimum(); - maximum = stats.maximum(); - } else { - computeForEachBand = isInteger && !isRGB; - } final SampleDimension.Builder builder = new SampleDimension.Builder(); for (int band=0; band < bands.length; band++) { + double minimum = Double.NaN; + double maximum = Double.NaN; + if (band < stats.length) { + final Statistics s = stats[band]; + if (s != null) { // `readStatistics()` may have left some values to null. + minimum = s.minimum(); + maximum = s.maximum(); + } + } /* * If statistics were not specified and the sample type is integer, * the minimum and maximum values may change for each band because * the sample size (in bits) can vary. */ - if (computeForEachBand) { + if (!(minimum <= maximum)) { // Use `!` for catching NaN. minimum = 0; - long max = Numerics.bitmask(sm.getSampleSize(band)) - 1; - if (!isUnsigned) { - max >>>= 1; - minimum = ~max; // Tild operator, not minus. + maximum = 1; + if (isInteger) { + long max = Numerics.bitmask(sm.getSampleSize(band)) - 1; + if (!isUnsigned) { + max >>>= 1; + minimum = ~max; // Tild operator, not minus. + } + maximum = max; } - maximum = max; } /* * Create the sample dimension for this band. The same "no data" value is used for all bands. @@ -233,7 +333,7 @@ abstract class RasterStore extends PRJDataStore implements GridCoverageResource builder.clear(); /* * Create the color model using the statistics of the band that we choose to make visible, - * or using a RGB color model if the number of bands and the data type are compatible. + * or using a RGB color model if the number of bands or the data type is compatible. */ if (band == VISIBLE_BAND) { if (isRGB) { diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RawRasterStore.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RawRasterStore.java index 5bcb94a3c2..adef8032fa 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RawRasterStore.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/RawRasterStore.java @@ -238,7 +238,7 @@ final class RawRasterStore extends RasterStore { if (reader == null) { readHeader(); } - loadBandDescriptions(input.filename, reader.layout, null); + loadBandDescriptions(input.filename, reader.layout); sampleDimensions = super.getSampleDimensions(); } catch (IOException e) { throw new DataStoreException(canNotRead(), e); diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/WritableStore.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/WritableStore.java index 2ee47ae26e..5e57c11003 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/WritableStore.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/esri/WritableStore.java @@ -147,7 +147,6 @@ final class WritableStore extends AsciiGridStore implements WritableGridCoverage */ throw new IncompatibleResourceException(h.canNotWrite()); } - header.put(xll, x); header.put(yll, y); if (scaleX == scaleY) {