amogh-jahagirdar commented on code in PR #12496: URL: https://github.com/apache/iceberg/pull/12496#discussion_r2007868196
########## parquet/src/main/java/org/apache/iceberg/parquet/ParquetVariantUtil.java: ########## @@ -0,0 +1,476 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.parquet; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Comparator; +import java.util.Deque; +import java.util.List; +import java.util.Optional; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Comparators; +import org.apache.iceberg.util.BinaryUtil; +import org.apache.iceberg.util.UnicodeUtil; +import org.apache.iceberg.variants.PhysicalType; +import org.apache.iceberg.variants.VariantArray; +import org.apache.iceberg.variants.VariantMetadata; +import org.apache.iceberg.variants.VariantObject; +import org.apache.iceberg.variants.VariantPrimitive; +import org.apache.iceberg.variants.VariantValue; +import org.apache.iceberg.variants.VariantVisitor; +import org.apache.iceberg.variants.Variants; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor; +import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Types; + +class ParquetVariantUtil { + private ParquetVariantUtil() {} + + /** + * Convert a Parquet {@link PrimitiveType} to the equivalent Variant {@link PhysicalType}. + * + * @param primitive a Parquet {@link PrimitiveType} + * @return a Variant {@link PhysicalType} + * @throws UnsupportedOperationException if the Parquet type is not equivalent to a variant type + */ + static PhysicalType convert(PrimitiveType primitive) { + LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation(); + if (annotation != null) { + return annotation.accept(PhysicalTypeConverter.INSTANCE).orElse(null); + } + + switch (primitive.getPrimitiveTypeName()) { + case BOOLEAN: + return PhysicalType.BOOLEAN_TRUE; + case INT32: + return PhysicalType.INT32; + case INT64: + return PhysicalType.INT64; + case FLOAT: + return PhysicalType.FLOAT; + case DOUBLE: + return PhysicalType.DOUBLE; + case BINARY: + return PhysicalType.BINARY; + default: + return null; + } + } + + /** + * Serialize Variant metadata and value in a single concatenated buffer. + * + * @param metadata a {VariantMetadata} + * @param value a {VariantValue} + * @return a buffer containing the metadata and value serialized and concatenated + */ + static ByteBuffer toByteBuffer(VariantMetadata metadata, VariantValue value) { + ByteBuffer buffer = + ByteBuffer.allocate(metadata.sizeInBytes() + value.sizeInBytes()) + .order(ByteOrder.LITTLE_ENDIAN); + metadata.writeTo(buffer, 0); + value.writeTo(buffer, metadata.sizeInBytes()); + return buffer; + } + + /** + * Converts a Parquet primitive value to a Variant in-memory value for the given {@link + * PhysicalType type}. + * + * @param primitive a primitive variant {@link PhysicalType} + * @param scale decimal scale used when the value is a decimal + * @param value a value from Parquet value to convert + * @param <T> Java type used for values of the given Variant physical type + * @return the primitive value + */ + @SuppressWarnings("unchecked") + static <T> T convertValue(PhysicalType primitive, int scale, Object value) { + switch (primitive) { + case BOOLEAN_FALSE: + case BOOLEAN_TRUE: + case INT32: + case INT64: + case DATE: + case TIMESTAMPTZ: + case TIMESTAMPNTZ: + case FLOAT: + case DOUBLE: + return (T) value; + case INT8: + return (T) (Byte) ((Number) value).byteValue(); + case INT16: + return (T) (Short) ((Number) value).shortValue(); + case DECIMAL4: + case DECIMAL8: + return (T) BigDecimal.valueOf(((Number) value).longValue(), scale); + case DECIMAL16: + return (T) new BigDecimal(new BigInteger(((Binary) value).getBytes()), scale); + case BINARY: + return (T) ((Binary) value).toByteBuffer(); + case STRING: + return (T) ((Binary) value).toStringUsingUTF8(); + default: + throw new IllegalStateException("Invalid bound type: " + primitive); + } + } + + /** + * Returns a comparator for values of the given primitive {@link PhysicalType type}. + * + * @param primitive a primitive variant {@link PhysicalType} + * @param <T> Java type used for values of the given Variant physical type + * @return a comparator for in-memory values of the given type + */ + @SuppressWarnings("unchecked") + static <T> Comparator<T> comparator(PhysicalType primitive) { + if (primitive == PhysicalType.BINARY) { + return (Comparator<T>) Comparators.unsignedBytes(); + } else { + return (Comparator<T>) Comparator.naturalOrder(); + } + } + + /** + * Returns a decimal scale if the primitive is a decimal, or 0 otherwise. + * + * @param primitive a primitive type + * @return decimal scale if the primitive is a decimal, 0 otherwise + */ + static int scale(PrimitiveType primitive) { + LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation(); + if (annotation instanceof DecimalLogicalTypeAnnotation) { + return ((DecimalLogicalTypeAnnotation) annotation).getScale(); + } + + return 0; + } + + /** + * Creates a Parquet schema to fully shred the {@link VariantValue}. + * + * @param value a variant value + * @return a Parquet schema that can fully shred the value + */ + static Type toParquetSchema(VariantValue value) { + return VariantVisitor.visit(value, new ParquetSchemaProducer()); + } + + private static class PhysicalTypeConverter implements LogicalTypeAnnotationVisitor<PhysicalType> { + private static final PhysicalTypeConverter INSTANCE = new PhysicalTypeConverter(); + + @Override + public Optional<PhysicalType> visit(StringLogicalTypeAnnotation ignored) { + return Optional.of(PhysicalType.STRING); + } + + @Override + public Optional<PhysicalType> visit(DecimalLogicalTypeAnnotation decimal) { + if (decimal.getPrecision() <= 9) { + return Optional.of(PhysicalType.DECIMAL4); + } else if (decimal.getPrecision() <= 18) { + return Optional.of(PhysicalType.DECIMAL8); + } else { + return Optional.of(PhysicalType.DECIMAL16); + } + } + + @Override + public Optional<PhysicalType> visit(DateLogicalTypeAnnotation ignored) { + return Optional.of(PhysicalType.DATE); + } + + @Override + public Optional<PhysicalType> visit(TimeLogicalTypeAnnotation ignored) { + return Optional.empty(); + } + + @Override + public Optional<PhysicalType> visit(TimestampLogicalTypeAnnotation timestamps) { + switch (timestamps.getUnit()) { + case MICROS: + if (timestamps.isAdjustedToUTC()) { + return Optional.of(PhysicalType.TIMESTAMPTZ); + } else { + return Optional.of(PhysicalType.TIMESTAMPNTZ); + } + case NANOS: + default: + return Optional.empty(); + } + } + + @Override + public Optional<PhysicalType> visit(IntLogicalTypeAnnotation ints) { + if (!ints.isSigned()) { + return Optional.empty(); + } + + switch (ints.getBitWidth()) { + case 8: + return Optional.of(PhysicalType.INT8); + case 16: + return Optional.of(PhysicalType.INT16); + case 32: + return Optional.of(PhysicalType.INT32); + case 64: + return Optional.of(PhysicalType.INT64); + default: + return Optional.empty(); + } + } + + @Override + public Optional<PhysicalType> visit(UUIDLogicalTypeAnnotation uuidLogicalType) { + return Optional.empty(); + } + } + + static class VariantMetrics { + private final long valueCount; + private final long nullCount; + private final VariantValue lowerBound; + private final VariantValue upperBound; + private final Deque<String> fieldNames = Lists.newLinkedList(); + private String lazyFieldName = null; + + VariantMetrics(long valueCount, long nullCount) { + this.valueCount = valueCount; + this.nullCount = nullCount; + this.lowerBound = null; + this.upperBound = null; + } + + VariantMetrics( + long valueCount, long nullCount, VariantValue lowerBound, VariantValue upperBound) { + this.valueCount = valueCount; + this.nullCount = nullCount; + this.lowerBound = truncateLowerBound(lowerBound); + this.upperBound = truncateUpperBound(upperBound); + } + + VariantMetrics prependFieldName(String name) { + this.lazyFieldName = null; + fieldNames.addFirst(name); + return this; + } + + public String fieldName() { + if (null == lazyFieldName) { + this.lazyFieldName = String.join(".", fieldNames); + } + + return lazyFieldName; + } + + public long valueCount() { + return valueCount; + } + + public long nullCount() { + return nullCount; + } + + public VariantValue lowerBound() { + return lowerBound; + } + + public VariantValue upperBound() { + return upperBound; + } + + private static VariantValue truncateLowerBound(VariantValue value) { + switch (value.type()) { + case STRING: + return Variants.of( + PhysicalType.STRING, + UnicodeUtil.truncateStringMin((String) value.asPrimitive().get(), 16)); + case BINARY: + return Variants.of( + PhysicalType.BINARY, + BinaryUtil.truncateBinaryMin((ByteBuffer) value.asPrimitive().get(), 16)); + default: + return value; + } + } + + private static VariantValue truncateUpperBound(VariantValue value) { + switch (value.type()) { + case STRING: + String truncatedString = + UnicodeUtil.truncateStringMax((String) value.asPrimitive().get(), 16); + return truncatedString != null ? Variants.of(PhysicalType.STRING, truncatedString) : null; + case BINARY: + ByteBuffer truncatedBuffer = + BinaryUtil.truncateBinaryMin((ByteBuffer) value.asPrimitive().get(), 16); Review Comment: Nit: Do we want to move 16 to an internal constant? ########## parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetrics.java: ########## @@ -0,0 +1,613 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.parquet; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.Deque; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.iceberg.FieldMetrics; +import org.apache.iceberg.Metrics; +import org.apache.iceberg.MetricsConfig; +import org.apache.iceberg.MetricsModes; +import org.apache.iceberg.MetricsUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Multimap; +import org.apache.iceberg.relocated.com.google.common.collect.Multimaps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.types.Comparators; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.BinaryUtil; +import org.apache.iceberg.util.NaNUtil; +import org.apache.iceberg.util.UnicodeUtil; +import org.apache.iceberg.variants.PhysicalType; +import org.apache.iceberg.variants.ShreddedObject; +import org.apache.iceberg.variants.VariantMetadata; +import org.apache.iceberg.variants.VariantValue; +import org.apache.iceberg.variants.Variants; +import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ColumnPath; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; + +class ParquetMetrics { + private ParquetMetrics() {} + + static Metrics metrics( + Schema schema, + MessageType type, + MetricsConfig metricsConfig, + ParquetMetadata metadata, + Stream<FieldMetrics<?>> fields) { + long rowCount = 0L; + Map<Integer, Long> columnSizes = Maps.newHashMap(); + Multimap<ColumnPath, ColumnChunkMetaData> columns = + Multimaps.newMultimap(Maps.newHashMap(), Lists::newArrayList); + for (BlockMetaData block : metadata.getBlocks()) { + rowCount += block.getRowCount(); + for (ColumnChunkMetaData column : block.getColumns()) { + Type.ID id = + type.getColumnDescription(column.getPath().toArray()).getPrimitiveType().getId(); + if (null == id) { + continue; + } + + int fieldId = id.intValue(); + MetricsModes.MetricsMode mode = MetricsUtil.metricsMode(schema, metricsConfig, fieldId); + if (mode != MetricsModes.None.get()) { + columns.put(column.getPath(), column); + columnSizes.put(fieldId, columnSizes.getOrDefault(fieldId, 0L) + column.getTotalSize()); + } + } + } + + Map<Integer, FieldMetrics<?>> metricsById = + fields.collect(Collectors.toMap(FieldMetrics::id, Function.identity())); + + Iterable<FieldMetrics<ByteBuffer>> results = + TypeWithSchemaVisitor.visit( + schema.asStruct(), + type, + new MetricsVisitor(schema, metricsConfig, metricsById, columns)); + + Map<Integer, Long> valueCounts = Maps.newHashMap(); + Map<Integer, Long> nullValueCounts = Maps.newHashMap(); + Map<Integer, Long> nanValueCounts = Maps.newHashMap(); + Map<Integer, ByteBuffer> lowerBounds = Maps.newHashMap(); + Map<Integer, ByteBuffer> upperBounds = Maps.newHashMap(); + + for (FieldMetrics<ByteBuffer> metrics : results) { + int id = metrics.id(); + if (metrics.valueCount() >= 0) { + valueCounts.put(id, metrics.valueCount()); + } + + if (metrics.nullValueCount() >= 0) { + nullValueCounts.put(id, metrics.nullValueCount()); + } + + if (metrics.nanValueCount() >= 0) { + nanValueCounts.put(id, metrics.nanValueCount()); + } + + if (metrics.hasBounds()) { + lowerBounds.put(id, metrics.lowerBound()); + upperBounds.put(id, metrics.upperBound()); + } + } + + return new Metrics( + rowCount, + columnSizes, + valueCounts, + nullValueCounts, + nanValueCounts, + lowerBounds, + upperBounds); + } + + private static class MetricsVisitor + extends TypeWithSchemaVisitor<Iterable<FieldMetrics<ByteBuffer>>> { + private final Schema schema; + private final MetricsConfig metricsConfig; + private final Map<Integer, FieldMetrics<?>> metricsById; + private final Multimap<ColumnPath, ColumnChunkMetaData> columns; + + private MetricsVisitor( + Schema schema, + MetricsConfig metricsConfig, + Map<Integer, FieldMetrics<?>> metricsById, + Multimap<ColumnPath, ColumnChunkMetaData> columns) { + this.schema = schema; + this.metricsConfig = metricsConfig; + this.metricsById = metricsById; + this.columns = columns; + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> message( + Types.StructType iStruct, + MessageType message, + List<Iterable<FieldMetrics<ByteBuffer>>> fieldResults) { + return Iterables.concat(fieldResults); + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> struct( + Types.StructType iStruct, + GroupType struct, + List<Iterable<FieldMetrics<ByteBuffer>>> fieldResults) { + return Iterables.concat(fieldResults); + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> list( + Types.ListType iList, GroupType array, Iterable<FieldMetrics<ByteBuffer>> elementResults) { + // remove lower and upper bounds for repeated fields + return ImmutableList.of(); + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> map( + Types.MapType iMap, + GroupType map, + Iterable<FieldMetrics<ByteBuffer>> keyResults, + Iterable<FieldMetrics<ByteBuffer>> valueResults) { + // repeated fields are not currently supported + return ImmutableList.of(); + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> primitive( + org.apache.iceberg.types.Type.PrimitiveType iPrimitive, PrimitiveType primitive) { + Type.ID id = primitive.getId(); + if (null == id) { + return ImmutableList.of(); + } + int fieldId = id.intValue(); + + MetricsModes.MetricsMode mode = MetricsUtil.metricsMode(schema, metricsConfig, fieldId); + if (mode == MetricsModes.None.get()) { + return ImmutableList.of(); + } + + int length = truncateLength(mode); + + FieldMetrics<ByteBuffer> metrics = metricsFromFieldMetrics(fieldId, iPrimitive, length); + if (metrics != null) { + return ImmutableList.of(metrics); + } + + metrics = metricsFromFooter(fieldId, iPrimitive, primitive, length); + if (metrics != null) { + return ImmutableList.of(metrics); + } + + return ImmutableList.of(); + } + + private FieldMetrics<ByteBuffer> metricsFromFieldMetrics( + int fieldId, org.apache.iceberg.types.Type.PrimitiveType icebergType, int truncateLength) { + FieldMetrics<?> fieldMetrics = metricsById.get(fieldId); + if (null == fieldMetrics) { + return null; + } else if (truncateLength <= 0) { + return new FieldMetrics<>( + fieldMetrics.id(), + fieldMetrics.valueCount(), + fieldMetrics.nullValueCount(), + fieldMetrics.nanValueCount()); + } else { + Object lowerBound = + truncateLowerBound(icebergType, fieldMetrics.lowerBound(), truncateLength); + Object upperBound = + truncateUpperBound(icebergType, fieldMetrics.upperBound(), truncateLength); + ByteBuffer lower = Conversions.toByteBuffer(icebergType, lowerBound); + ByteBuffer upper = Conversions.toByteBuffer(icebergType, upperBound); + return new FieldMetrics<>( + fieldMetrics.id(), + fieldMetrics.valueCount(), + fieldMetrics.nullValueCount(), + fieldMetrics.nanValueCount(), + lower, + upper); + } + } + + private FieldMetrics<ByteBuffer> metricsFromFooter( + int fieldId, + org.apache.iceberg.types.Type.PrimitiveType icebergType, + PrimitiveType primitive, + int truncateLength) { + if (primitive.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT96) { + return null; + } else if (truncateLength <= 0) { + return counts(fieldId); + } else { + return bounds(fieldId, icebergType, primitive, truncateLength); + } + } + + private FieldMetrics<ByteBuffer> counts(int fieldId) { + ColumnPath path = ColumnPath.get(currentPath()); + long valueCount = 0; + long nullCount = 0; + + for (ColumnChunkMetaData column : columns.get(path)) { + Statistics<?> stats = column.getStatistics(); + if (stats == null || stats.isEmpty()) { + return null; + } + + nullCount += stats.getNumNulls(); + valueCount += column.getValueCount(); + } + + return new FieldMetrics<>(fieldId, valueCount, nullCount); + } + + private <T> FieldMetrics<ByteBuffer> bounds( + int fieldId, + org.apache.iceberg.types.Type.PrimitiveType icebergType, + PrimitiveType primitive, + int truncateLength) { + if (icebergType == null) { + return null; + } + + ColumnPath path = ColumnPath.get(currentPath()); + Comparator<T> comparator = Comparators.forType(icebergType); + long valueCount = 0; + long nullCount = 0; + T lowerBound = null; + T upperBound = null; + + for (ColumnChunkMetaData column : columns.get(path)) { + Statistics<?> stats = column.getStatistics(); + if (stats == null || stats.isEmpty()) { + return null; + } + + nullCount += stats.getNumNulls(); + valueCount += column.getValueCount(); + + if (stats.hasNonNullValue()) { + T chunkMin = + ParquetConversions.convertValue(icebergType, primitive, stats.genericGetMin()); + if (lowerBound == null || comparator.compare(chunkMin, lowerBound) < 0) { + lowerBound = chunkMin; + } + + T chunkMax = + ParquetConversions.convertValue(icebergType, primitive, stats.genericGetMax()); + if (upperBound == null || comparator.compare(chunkMax, upperBound) > 0) { + upperBound = chunkMax; + } + } + } + + if (NaNUtil.isNaN(lowerBound) || NaNUtil.isNaN(upperBound)) { + return new FieldMetrics<>(fieldId, valueCount, nullCount); + } + + lowerBound = truncateLowerBound(icebergType, lowerBound, truncateLength); + upperBound = truncateUpperBound(icebergType, upperBound, truncateLength); + + ByteBuffer lower = Conversions.toByteBuffer(icebergType, lowerBound); + ByteBuffer upper = Conversions.toByteBuffer(icebergType, upperBound); + + return new FieldMetrics<>(fieldId, valueCount, nullCount, lower, upper); + } + + @Override + public Iterable<FieldMetrics<ByteBuffer>> variant( + Types.VariantType iVariant, GroupType variant, Iterable<FieldMetrics<ByteBuffer>> ignored) { + Type.ID id = variant.getId(); + if (null == id) { + return ImmutableList.of(); + } + int fieldId = id.intValue(); + + MetricsModes.MetricsMode mode = MetricsUtil.metricsMode(schema, metricsConfig, fieldId); + if (mode == MetricsModes.None.get()) { + return ImmutableList.of(); + } + + List<ParquetVariantUtil.VariantMetrics> results = + Lists.newArrayList( + ParquetVariantVisitor.visit(variant, new MetricsVariantVisitor(currentPath()))); + + if (results.size() <= 1) { + return ImmutableList.of(); + } + + ParquetVariantUtil.VariantMetrics metadataCounts = results.get(0); + if (mode == MetricsModes.Counts.get()) { + return ImmutableList.of( + new FieldMetrics<>(fieldId, metadataCounts.valueCount(), metadataCounts.nullCount())); + } + + Set<String> fieldNames = Sets.newTreeSet(); + for (ParquetVariantUtil.VariantMetrics result : results.subList(1, results.size())) { + fieldNames.add(result.fieldName()); + } + + VariantMetadata metadata = Variants.metadata(fieldNames); + ShreddedObject lowerBounds = Variants.object(metadata); + ShreddedObject upperBounds = Variants.object(metadata); + for (ParquetVariantUtil.VariantMetrics result : results.subList(1, results.size())) { + String fieldName = result.fieldName(); + lowerBounds.put(fieldName, result.lowerBound()); + upperBounds.put(fieldName, result.upperBound()); + } + + return ImmutableList.of( + new FieldMetrics<>( Review Comment: I feel like truncate 16 for all the shredded fields is the most intuitive out of all the options. That said I was wondering, would it make more sense to pass the `MetricsMode` to `MetricsVariantVisitor` that way if it's a truncate, we can use the user set property as opposed to ignoring it? I don't think this is a hard blocker though, since the default is already truncate(16) and it's probably a sane truncation for the shredded fields as well. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org