This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit a70874eb6e84a0da44148b85f95cb492234f6864 Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Mon Dec 27 05:50:27 2021 +0100 Implement `FeatureSet.getEnvelope()` with an estimation computed from PostgreSQL statistics. This is derived from Alexis's work on the "fix/fast-envelope" branch. --- .../apache/sis/internal/sql/feature/Column.java | 2 + .../apache/sis/internal/sql/feature/Database.java | 40 +++++- .../sis/internal/sql/feature/InfoStatements.java | 16 +-- .../org/apache/sis/internal/sql/feature/Table.java | 26 ++++ .../sis/internal/sql/feature/TableReference.java | 4 +- .../sis/internal/sql/postgis/ExtentEstimator.java | 146 +++++++++++++++++++++ .../apache/sis/internal/sql/postgis/Postgres.java | 23 ++++ .../sis/internal/sql/postgis/PostgresTest.java | 23 ++-- .../main/java/org/apache/sis/storage/DataSet.java | 7 +- 9 files changed, 261 insertions(+), 26 deletions(-) diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Column.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Column.java index 480b10c..c2f75ab 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Column.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Column.java @@ -106,6 +106,8 @@ public final class Column { /** * If this column is a geometry column, the type of the geometry objects. Otherwise {@code null}. + * + * @see #getGeometryType() */ private GeometryType geometryType; diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Database.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Database.java index 6944bd7..b898efe 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Database.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Database.java @@ -34,11 +34,13 @@ import java.sql.SQLException; import java.sql.Types; import javax.sql.DataSource; import org.opengis.util.GenericName; +import org.opengis.geometry.Envelope; import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.apache.sis.setup.GeometryLibrary; import org.apache.sis.internal.metadata.sql.Syntax; import org.apache.sis.internal.metadata.sql.Dialect; import org.apache.sis.internal.metadata.sql.Reflection; +import org.apache.sis.internal.metadata.sql.SQLBuilder; import org.apache.sis.internal.storage.MetadataBuilder; import org.apache.sis.internal.feature.Geometries; import org.apache.sis.internal.feature.GeometryType; @@ -188,7 +190,7 @@ public class Database<G> extends Syntax { * * @see #log(LogRecord) */ - final StoreListeners listeners; + public final StoreListeners listeners; /** * Cache of Coordinate Reference Systems created for a given SRID. @@ -478,6 +480,26 @@ public class Database<G> extends Syntax { } /** + * Appends a call to a function defined in the spatial schema. + * The function name will be prefixed by catalog and schema name if applicable. + * The function will not be quoted. + * + * @param sql the SQL builder where to add the spatial function name. + * @param function the function to append. + */ + public final void appendFunctionCall(final SQLBuilder sql, final String function) { + final String schema = schemaOfSpatialTables; + if (schema != null && !schema.isEmpty()) { + final String catalog = catalogOfSpatialTables; + if (catalog != null && !catalog.isEmpty()) { + sql.appendIdentifier(catalog).append('.'); + } + sql.appendIdentifier(schema).append('.'); + } + sql.append(function); + } + + /** * Returns {@code true} if this database is a spatial database. * Tables such as "SPATIAL_REF_SYS" are used as sentinel values. * @@ -574,6 +596,22 @@ public class Database<G> extends Syntax { } /** + * Computes an estimation of the envelope of all geometry columns in the given table. + * The returned envelope shall contain at least the two-dimensional spatial components. + * Whether other dimensions (vertical and temporal) and present or not depends on the implementation. + * This method is invoked only if the {@code columns} array contains at least one geometry column. + * + * @param table the table for which to compute an estimation of the envelope. + * @param columns all columns in the table. Implementation should ignore non-geometry columns. + * This is a reference to an internal array; <strong>do not modify</strong>. + * @return an estimation of the spatiotemporal resource extent, or {@code null} if none. + * @throws SQLException if an error occurred while fetching the envelope. + */ + protected Envelope getEstimatedExtent(TableReference table, Column[] columns) throws SQLException { + return null; + } + + /** * Returns a function for getting values from a geometry column. * This is a helper method for {@link #getMapping(Column)} implementations. * diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/InfoStatements.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/InfoStatements.java index 9ade14d..46a1e59 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/InfoStatements.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/InfoStatements.java @@ -182,16 +182,12 @@ public class InfoStatements implements Localized, AutoCloseable { * The table name will be prefixed by catalog and schema name if applicable. */ private void appendFrom(final SQLBuilder sql, final String table) { - sql.append(" FROM "); - final String schema = database.schemaOfSpatialTables; - if (schema != null && !schema.isEmpty()) { - final String catalog = database.catalogOfSpatialTables; - if (catalog != null && !catalog.isEmpty()) { - sql.appendIdentifier(catalog).append('.'); - } - sql.appendIdentifier(schema).append('.'); - } - sql.append(table).append(" WHERE "); // Intentionally no quotes for table name. + /* + * Despite its name, `appendFunctionCall(…)` can also be used for formatting + * table names provided that we want unquoted names (which is the case here). + */ + database.appendFunctionCall(sql.append(" FROM "), table); + sql.append(" WHERE "); } /** diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Table.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Table.java index 836d92e..9c18aed 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Table.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Table.java @@ -25,6 +25,7 @@ import java.sql.DatabaseMetaData; import java.sql.ResultSet; import java.sql.SQLException; import org.opengis.util.GenericName; +import org.opengis.geometry.Envelope; import org.apache.sis.storage.DataStoreException; import org.apache.sis.storage.InternalDataStoreException; import org.apache.sis.internal.metadata.sql.Reflection; @@ -312,6 +313,31 @@ final class Table extends AbstractFeatureSet { } /** + * Returns an estimation of the envelope of all geometry columns in this table. + * The returned envelope shall contain at least the two-dimensional spatial components. + * Whether other dimensions (vertical and temporal) and present or not depends on the implementation. + * + * <h2>Departure from interface contract</h2> + * {@link org.apache.sis.storage.DataSet#getEnvelope()} contract allows estimated envelope to be larger than + * actual envelope (similar to Java2D {@link java.awt.Shape#getBounds()} contract), but smaller envelope are + * discouraged. Despite that, this method may return smaller envelopes because the computation is done using + * a subset of all data. + * + * @return an estimation of the spatiotemporal resource extent. + * @throws DataStoreException if an error occurred while reading or computing the envelope. + */ + @Override + public Optional<Envelope> getEnvelope() throws DataStoreException { + if (hasGeometry) try { + return Optional.ofNullable(database.getEstimatedExtent(name, attributes)); + } catch (SQLException e) { + throw new DataStoreException(e); + } else { + return Optional.empty(); + } + } + + /** * Returns the column from an attribute name specified as XPath. * Current implementation interprets the {@code xpath} value only as the attribute name, * but a future implementation may parse something like a {@code "table/column"} syntax. diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/TableReference.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/TableReference.java index 9d73cf0..23aaf47 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/TableReference.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/TableReference.java @@ -35,7 +35,7 @@ import static org.apache.sis.internal.util.Strings.trimOrNull; * * @author Johann Sorel (Geomatys) * @author Martin Desruisseaux (Geomatys) - * @version 1.1 + * @version 1.2 * @since 1.0 * @module */ @@ -53,7 +53,7 @@ public class TableReference { * Names are stored here as they were given by JDBC because those names are sometime * compared with other JDBC metadata or used for fetching more table metadata. */ - final String catalog, schema, table; + public final String catalog, schema, table; /** * Ignored by this class; reserved for caller and subclasses usage. diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/ExtentEstimator.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/ExtentEstimator.java new file mode 100644 index 0000000..2fa86da --- /dev/null +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/ExtentEstimator.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sis.internal.sql.postgis; + +import java.sql.Statement; +import java.sql.ResultSet; +import java.sql.SQLException; +import org.opengis.referencing.operation.TransformException; +import org.apache.sis.geometry.Envelopes; +import org.apache.sis.geometry.GeneralEnvelope; +import org.apache.sis.internal.sql.feature.Database; +import org.apache.sis.internal.sql.feature.Column; +import org.apache.sis.internal.sql.feature.TableReference; +import org.apache.sis.internal.metadata.sql.SQLBuilder; + + +/** + * Estimation of the extent of geometries in a given table or column using statistics if available. + * Uses the PostGIS {@code ST_EstimatedExtent(…)} function to get a rough estimation of column extent. + * + * @author Alexis Manin (Geomatys) + * @author Martin Desruisseaux (Geomatys) + * @version 1.2 + * + * @see <a href="https://postgis.net/docs/ST_EstimatedExtent.html">ST_EstimatedExtent</a> + * + * @since 1.2 + * @module + */ +final class ExtentEstimator { + /** + * The database containing the table for which to estimate the extent. + */ + private final Database database; + + /** + * The table for which to get the extent. + */ + private final TableReference table; + + /** + * All columns in the table (including non-geometry columns). + * This is a reference to an internal array; <strong>do not modify</strong>. + */ + private final Column[] columns; + + /** + * A temporary buffer with helper methods for building the SQL statement. + */ + private final SQLBuilder builder; + + /** + * The union of all extents found, or {@code null} if none. + */ + private GeneralEnvelope envelope; + + /** + * Errors that occurred during envelope transformations, or {@code null} if none. + */ + private TransformException error; + + /** + * Creates a new extent estimator for the specified table. + */ + ExtentEstimator(final Database database, final TableReference table, final Column[] columns) { + this.database = database; + this.table = table; + this.columns = columns; + this.builder = new SQLBuilder(database); + } + + /** + * Estimates the extent in the specified columns using PostgreSQL statistics. + * If there is no statistics available, then this method executes {@code ANALYZE} + * and tries again. + * + * @param statement statement to use for executing queries. Shall be closed by caller. + * @return an estimation of the union of extents in given columns, or {@code null} if unknown. + */ + GeneralEnvelope estimate(final Statement statement) throws SQLException { + query(statement); + if (envelope == null) { + builder.append("ANALYZE ").appendIdentifier(table.catalog, table.schema, table.table); + final String sql = builder.toString(); + builder.clear(); + statement.execute(sql); + query(statement); + } + if (error != null) { + database.listeners.warning(error); + } + return envelope; + } + + /** + * Estimates the extent in the specified columns using current statistics. + * If there is no statistics available, then this method returns {@code null}. + * + * @param statement statement to use for executing queries. Shall be closed by caller. + * @return an estimation of the union of extents in given columns, or {@code null} if unknown. + */ + private void query(final Statement statement) throws SQLException { + for (final Column column : columns) { + if (column.getGeometryType() != null) { + database.appendFunctionCall(builder.append("SELECT "), "ST_EstimatedExtent"); + builder.append('('); + if (table.schema != null) { + builder.appendValue(table.schema).append(", "); + } + final String sql = builder.appendValue(table.table).append(", ").appendValue(column.name).append(')').toString(); + builder.clear(); + try (ResultSet result = statement.executeQuery(sql)) { + while (result.next()) { + final String wkt = result.getString(1); + if (wkt != null) { + final GeneralEnvelope env = new GeneralEnvelope(wkt); + env.setCoordinateReferenceSystem(column.getDefaultCRS()); + if (envelope == null) { + envelope = env; + } else try { + envelope.add(Envelopes.transform(env, envelope.getCoordinateReferenceSystem())); + } catch (TransformException e) { + if (error == null) error = e; + else error.addSuppressed(e); + } + } + } + } + } + } + } +} diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/Postgres.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/Postgres.java index 87122c2..06925c8 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/Postgres.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/postgis/Postgres.java @@ -25,9 +25,11 @@ import java.sql.DatabaseMetaData; import java.sql.SQLException; import javax.sql.DataSource; import java.util.logging.Level; +import org.opengis.geometry.Envelope; import org.apache.sis.internal.feature.Geometries; import org.apache.sis.internal.sql.feature.BinaryEncoding; import org.apache.sis.internal.sql.feature.InfoStatements; +import org.apache.sis.internal.sql.feature.TableReference; import org.apache.sis.internal.sql.feature.Column; import org.apache.sis.internal.sql.feature.Database; import org.apache.sis.internal.sql.feature.ValueGetter; @@ -166,4 +168,25 @@ public final class Postgres<G> extends Database<G> { protected SelectionClauseWriter getFilterToSQL() { return ExtendedClauseWriter.INSTANCE; } + + /** + * Computes an estimation of the envelope of all geometry columns using PostgreSQL statistics if available. + * Uses the PostGIS {@code ST_EstimatedExtent(…)} function to get a rough estimation of column extent. + * This method is invoked only if the {@code columns} array contains at least one geometry column. + * + * @param table the table for which to compute an estimation of the envelope. + * @param columns all columns in the table (including non-geometry columns). + * This is a reference to an internal array; <strong>do not modify</strong>. + * @return an estimation of the spatiotemporal resource extent, or {@code null} if none. + * @throws SQLException if an error occurred while fetching the envelope. + * + * @see <a href="https://postgis.net/docs/ST_EstimatedExtent.html">ST_EstimatedExtent</a> + */ + @Override + protected Envelope getEstimatedExtent(final TableReference table, final Column[] columns) throws SQLException { + final ExtentEstimator ex = new ExtentEstimator(this, table, columns); + try (Connection c = source.getConnection(); Statement statement = c.createStatement()) { + return ex.estimate(statement); + } + } } diff --git a/storage/sis-sqlstore/src/test/java/org/apache/sis/internal/sql/postgis/PostgresTest.java b/storage/sis-sqlstore/src/test/java/org/apache/sis/internal/sql/postgis/PostgresTest.java index 63af275..abcf98d 100644 --- a/storage/sis-sqlstore/src/test/java/org/apache/sis/internal/sql/postgis/PostgresTest.java +++ b/storage/sis-sqlstore/src/test/java/org/apache/sis/internal/sql/postgis/PostgresTest.java @@ -24,10 +24,11 @@ import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.lang.reflect.Method; import java.util.stream.Stream; +import org.opengis.geometry.Envelope; import org.opengis.util.FactoryException; import org.apache.sis.setup.OptionKey; import org.apache.sis.setup.GeometryLibrary; -import org.apache.sis.storage.DataStoreException; +import org.apache.sis.storage.FeatureSet; import org.apache.sis.storage.sql.SQLStore; import org.apache.sis.storage.sql.SQLStoreProvider; import org.apache.sis.storage.sql.ResourceDefinition; @@ -105,8 +106,17 @@ public final strictfp class PostgresTest extends TestCase { testInfoStatements(info); testGeometryGetter(info, connection); testRasterReader(TestRaster.USHORT, info, connection); - testFeatures(store); } + /* + * Tests through public API. + */ + final FeatureSet resource = store.findResource("SpatialData"); + try (Stream<Feature> features = resource.features(false)) { + features.forEach(PostgresTest::validate); + } + final Envelope envelope = resource.getEnvelope().get(); + assertEquals(envelope.getMinimum(0), -72, 1); + assertEquals(envelope.getMaximum(1), 43, 1); } } } @@ -152,15 +162,6 @@ public final strictfp class PostgresTest extends TestCase { } /** - * Tests the construction of feature instances. - */ - private static void testFeatures(final SQLStore store) throws DataStoreException { - try (Stream<Feature> features = store.findResource("SpatialData").features(false)) { - features.forEach(PostgresTest::validate); - } - } - - /** * Invoked for each feature instances for performing some checks on the feature. * This method performs only a superficial verification of geometries. */ diff --git a/storage/sis-storage/src/main/java/org/apache/sis/storage/DataSet.java b/storage/sis-storage/src/main/java/org/apache/sis/storage/DataSet.java index 3a136a8..d6cd241 100644 --- a/storage/sis-storage/src/main/java/org/apache/sis/storage/DataSet.java +++ b/storage/sis-storage/src/main/java/org/apache/sis/storage/DataSet.java @@ -68,8 +68,11 @@ public interface DataSet extends Resource { * If this resource uses many different CRS with none of them covering all data, then the envelope should use a * global system (typically a {@linkplain org.apache.sis.referencing.crs.DefaultGeocentricCRS geographic CRS}). * - * <p>The returned envelope is not necessarily the smallest bounding box encompassing all data. - * If the smallest envelope is too costly to compute, this method may conservatively return a larger envelope.</p> + * <h4>Estimated envelopes</h4> + * The returned envelope is not necessarily the smallest bounding box encompassing all data. + * If the smallest envelope is too costly to compute, this method may conservatively return a larger envelope. + * The converse (returning a smaller envelope) should be avoided, but is not strictly forbidden + * because some resources may compute the envelope using only a subset of all the resource data. * * @return the spatiotemporal resource extent. May be absent if none or too costly to compute. * @throws DataStoreException if an error occurred while reading or computing the envelope.