deniskuzZ commented on code in PR #12629: URL: https://github.com/apache/iceberg/pull/12629#discussion_r2013847624
########## core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java: ########## @@ -53,14 +56,43 @@ public static Collection<PartitionStats> computeStats(Table table, Snapshot snap StructType partitionType = Partitioning.partitionType(table); List<ManifestFile> manifests = snapshot.allManifests(table.io()); - Queue<PartitionMap<PartitionStats>> statsByManifest = Queues.newConcurrentLinkedQueue(); - Tasks.foreach(manifests) - .stopOnFailure() - .throwFailureWhenFinished() - .executeWith(ThreadPools.getWorkerPool()) - .run(manifest -> statsByManifest.add(collectStats(table, manifest, partitionType))); + return collectStats(table, manifests, partitionType).values(); + } + + /** + * Computes the partition stats incrementally after the given snapshot to current snapshot. + * + * @param table the table for which partition stats to be computed. + * @param afterSnapshot the snapshot after which partition stats is computed (exclusive). + * @param currentSnapshot the snapshot till which partition stats is computed (inclusive). + * @return the {@link PartitionMap} of {@link PartitionStats} + */ + public static PartitionMap<PartitionStats> computeStatsIncremental( Review Comment: agree, ````analyze table <tbl> compute statistics```` could be exercised for that. However, why not add just an extra param like `forceRecompute/recompute` instead of diff method signatures? ########## core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java: ########## @@ -53,14 +56,43 @@ public static Collection<PartitionStats> computeStats(Table table, Snapshot snap StructType partitionType = Partitioning.partitionType(table); List<ManifestFile> manifests = snapshot.allManifests(table.io()); - Queue<PartitionMap<PartitionStats>> statsByManifest = Queues.newConcurrentLinkedQueue(); - Tasks.foreach(manifests) - .stopOnFailure() - .throwFailureWhenFinished() - .executeWith(ThreadPools.getWorkerPool()) - .run(manifest -> statsByManifest.add(collectStats(table, manifest, partitionType))); + return collectStats(table, manifests, partitionType).values(); + } + + /** + * Computes the partition stats incrementally after the given snapshot to current snapshot. + * + * @param table the table for which partition stats to be computed. + * @param afterSnapshot the snapshot after which partition stats is computed (exclusive). + * @param currentSnapshot the snapshot till which partition stats is computed (inclusive). + * @return the {@link PartitionMap} of {@link PartitionStats} + */ + public static PartitionMap<PartitionStats> computeStatsIncremental( Review Comment: agree, ````analyze table <tbl> compute statistics```` could be exercised for that. However, why not add just an extra param like `forceRecompute`/`recompute` instead of diff method signatures? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org