aokolnychyi commented on code in PR #6371: URL: https://github.com/apache/iceberg/pull/6371#discussion_r1042627234
########## spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/Spark3Util.java: ########## @@ -255,74 +256,90 @@ public static org.apache.iceberg.Table toIcebergTable(Table table) { return sparkTable.table(); } + public static Transform[] toTransforms(Schema schema, List<PartitionField> fields) { + SpecTransformToSparkTransform visitor = new SpecTransformToSparkTransform(schema); + + List<Transform> transforms = Lists.newArrayList(); + + for (PartitionField field : fields) { + Transform transform = PartitionSpecVisitor.visit(schema, field, visitor); + if (transform != null) { + transforms.add(transform); + } + } + + return transforms.toArray(new Transform[0]); + } + /** * Converts a PartitionSpec to Spark transforms. * * @param spec a PartitionSpec * @return an array of Transforms */ public static Transform[] toTransforms(PartitionSpec spec) { - Map<Integer, String> quotedNameById = SparkSchemaUtil.indexQuotedNameById(spec.schema()); - List<Transform> transforms = - PartitionSpecVisitor.visit( - spec, - new PartitionSpecVisitor<Transform>() { - @Override - public Transform identity(String sourceName, int sourceId) { - return Expressions.identity(quotedName(sourceId)); - } + SpecTransformToSparkTransform visitor = new SpecTransformToSparkTransform(spec.schema()); + List<Transform> transforms = PartitionSpecVisitor.visit(spec, visitor); + return transforms.stream().filter(Objects::nonNull).toArray(Transform[]::new); + } - @Override - public Transform bucket(String sourceName, int sourceId, int numBuckets) { - return Expressions.bucket(numBuckets, quotedName(sourceId)); - } + private static class SpecTransformToSparkTransform implements PartitionSpecVisitor<Transform> { + private final Map<Integer, String> quotedNameById; - @Override - public Transform truncate(String sourceName, int sourceId, int width) { - return Expressions.apply( - "truncate", - Expressions.column(quotedName(sourceId)), - Expressions.literal(width)); - } + SpecTransformToSparkTransform(Schema schema) { + this.quotedNameById = SparkSchemaUtil.indexQuotedNameById(schema); + } - @Override - public Transform year(String sourceName, int sourceId) { - return Expressions.years(quotedName(sourceId)); - } + @Override + public Transform identity(String sourceName, int sourceId) { + return Expressions.identity(quotedName(sourceId)); + } - @Override - public Transform month(String sourceName, int sourceId) { - return Expressions.months(quotedName(sourceId)); - } + @Override + public Transform bucket(String sourceName, int sourceId, int numBuckets) { + return Expressions.bucket(numBuckets, quotedName(sourceId)); + } - @Override - public Transform day(String sourceName, int sourceId) { - return Expressions.days(quotedName(sourceId)); - } + @Override + public Transform truncate(String sourceName, int sourceId, int width) { + NamedReference column = Expressions.column(quotedName(sourceId)); + return Expressions.apply("truncate", Expressions.literal(width), column); Review Comment: I think the only place that would change is string output of partitioning in `SparkTable`. Otherwise, we handle both combinations in the `TruncateTransform` extractor. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org