zachdisc commented on code in PR #9731: URL: https://github.com/apache/iceberg/pull/9731#discussion_r1498182627
########## spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteManifestsSparkAction.java: ########## @@ -90,13 +93,45 @@ public class RewriteManifestsSparkAction public static final String USE_CACHING = "use-caching"; public static final boolean USE_CACHING_DEFAULT = false; + private List<String> partitionSortColumns = null; private static final Logger LOG = LoggerFactory.getLogger(RewriteManifestsSparkAction.class); private static final RewriteManifests.Result EMPTY_RESULT = ImmutableRewriteManifests.Result.builder() .rewrittenManifests(ImmutableList.of()) .addedManifests(ImmutableList.of()) .build(); + /** + * Supply an optional set of partition columns to sort the rewritten manifests by. Expects real + * column names used for partitioning; will resolve to the proper hidden partition names. + * + * @param partitionSortOrder - Order of partitions to sort manifests by + * @return this action + */ + @Override + public RewriteManifestsSparkAction sort(List<String> partitionSortOrder) { + // Build up a mapping of input column -> partition column name (AKA x -> x_bucket_1000) + Map<String, String> partitionFieldMap = + spec.fields().stream() + .map( + partitionField -> + Map.entry( + spec.schema().findField(partitionField.sourceId()).name(), + partitionField.name())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // Check if these partitions are included in the spec + Preconditions.checkArgument( Review Comment: Say there are 2 partition specs for this table. Spec1 partitions by (`a_bucket, b`), Spec2 partitions by (`a_bucket_100, c_days`). If you originally pass a valid `sort(b, a)`, then change the `.specId` to 2, then the validated custom column order will now be inaccurate. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org