aihuaxu commented on code in PR #11415: URL: https://github.com/apache/iceberg/pull/11415#discussion_r1866700545
########## core/src/main/java/org/apache/iceberg/variants/VariantArray.java: ########## @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +/** An variant array value. */ +public interface VariantArray extends VariantValue { + /** Returns the {@link VariantValue} at {@code index} in this array. */ + VariantValue get(int index); + + @Override + default Variants.PhysicalType type() { + return Variants.PhysicalType.ARRAY; + } + + @Override + default VariantArray asArray() { Review Comment: Can we have a code coverage for this? ########## core/src/main/java/org/apache/iceberg/variants/Variant.java: ########## @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +/** A variant metadata and value pair. */ +public interface Variant { Review Comment: Currently this is not used. We include for the future use? ########## core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java: ########## @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.variants.Variants.Primitives; + +class PrimitiveWrapper<T> implements VariantPrimitive<T> { + private static final byte NULL_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_NULL); + private static final byte TRUE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_TRUE); + private static final byte FALSE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_FALSE); + private static final byte INT8_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT8); + private static final byte INT16_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT16); + private static final byte INT32_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT32); + private static final byte INT64_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT64); + private static final byte FLOAT_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_FLOAT); + private static final byte DOUBLE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DOUBLE); + private static final byte DATE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DATE); + private static final byte TIMESTAMPTZ_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_TIMESTAMPTZ); + private static final byte TIMESTAMPNTZ_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_TIMESTAMPNTZ); + private static final byte DECIMAL4_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL4); + private static final byte DECIMAL8_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL8); + private static final byte DECIMAL16_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL16); + private static final byte BINARY_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_BINARY); + private static final byte STRING_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_STRING); + + private final Variants.PhysicalType type; + private final T value; + private ByteBuffer buffer = null; + + PrimitiveWrapper(Variants.PhysicalType type, T value) { + this.type = type; + this.value = value; + } + + @Override + public Variants.PhysicalType type() { + return type; + } + + @Override + public T get() { + return value; + } + + @Override + public int sizeInBytes() { + switch (type()) { + case NULL: + case BOOLEAN_TRUE: + case BOOLEAN_FALSE: + return 1; // 1 header only + case INT8: + return 2; // 1 header + 1 value + case INT16: + return 3; // 1 header + 2 value + case INT32: + case DATE: + case FLOAT: + return 5; // 1 header + 4 value + case INT64: + case DOUBLE: + case TIMESTAMPTZ: + case TIMESTAMPNTZ: + return 9; // 1 header + 8 value + case DECIMAL4: + return 6; // 1 header + 1 scale + 4 unscaled value + case DECIMAL8: + return 10; // 1 header + 1 scale + 8 unscaled value + case DECIMAL16: + return 18; // 1 header + 1 scale + 16 unscaled value + case BINARY: + return 5 + ((ByteBuffer) value).remaining(); // 1 header + 4 length + value length Review Comment: Not very sure about remaining method but as I read the doc, would it be inaccurate if part of the buffer is read? ########## core/src/main/java/org/apache/iceberg/variants/Variants.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.util.DateTimeUtil; + +public class Variants { + enum LogicalType { + NULL, + BOOLEAN, + EXACT_NUMERIC, + FLOAT, + DOUBLE, + DATE, + TIMESTAMPTZ, + TIMESTAMPNTZ, + BINARY, + STRING, + ARRAY, + OBJECT + } + + public enum PhysicalType { + NULL(LogicalType.NULL, Void.class), + BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), + BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), + INT8(LogicalType.EXACT_NUMERIC, Integer.class), + INT16(LogicalType.EXACT_NUMERIC, Integer.class), + INT32(LogicalType.EXACT_NUMERIC, Integer.class), + INT64(LogicalType.EXACT_NUMERIC, Long.class), + DOUBLE(LogicalType.DOUBLE, Double.class), + DECIMAL4(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL8(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL16(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DATE(LogicalType.DATE, Integer.class), + TIMESTAMPTZ(LogicalType.TIMESTAMPTZ, Long.class), + TIMESTAMPNTZ(LogicalType.TIMESTAMPNTZ, Long.class), + FLOAT(LogicalType.FLOAT, Float.class), + BINARY(LogicalType.BINARY, ByteBuffer.class), + STRING(LogicalType.STRING, String.class), + ARRAY(LogicalType.ARRAY, List.class), + OBJECT(LogicalType.OBJECT, Map.class); + + private final LogicalType logicalType; + private final Class<?> javaClass; + + PhysicalType(LogicalType logicalType, Class<?> javaClass) { + this.logicalType = logicalType; + this.javaClass = javaClass; + } + + LogicalType toLogicalType() { + return logicalType; + } + + public Class<?> javaClass() { + return javaClass; + } + + public static PhysicalType from(int primitiveType) { + switch (primitiveType) { + case Primitives.TYPE_NULL: + return NULL; + case Primitives.TYPE_TRUE: + return BOOLEAN_TRUE; + case Primitives.TYPE_FALSE: + return BOOLEAN_FALSE; + case Primitives.TYPE_INT8: + return INT8; + case Primitives.TYPE_INT16: + return INT16; + case Primitives.TYPE_INT32: + return INT32; + case Primitives.TYPE_INT64: + return INT64; + case Primitives.TYPE_DATE: + return DATE; + case Primitives.TYPE_TIMESTAMPTZ: + return TIMESTAMPTZ; + case Primitives.TYPE_TIMESTAMPNTZ: + return TIMESTAMPNTZ; + case Primitives.TYPE_FLOAT: + return FLOAT; + case Primitives.TYPE_DOUBLE: + return DOUBLE; + case Primitives.TYPE_DECIMAL4: + return DECIMAL4; + case Primitives.TYPE_DECIMAL8: + return DECIMAL8; + case Primitives.TYPE_DECIMAL16: + return DECIMAL16; + case Primitives.TYPE_BINARY: + return BINARY; + case Primitives.TYPE_STRING: + return STRING; + } + + throw new UnsupportedOperationException("Unknown primitive physical type: " + primitiveType); + } + } + + interface Serialized { + ByteBuffer buffer(); + } + + abstract static class SerializedValue implements VariantValue, Serialized { + @Override + public int sizeInBytes() { + return buffer().remaining(); + } + + @Override + public int writeTo(ByteBuffer buffer, int offset) { + ByteBuffer value = buffer(); + VariantUtil.writeBufferAbsolute(buffer, offset, value); + return value.remaining(); + } + } + + static class Primitives { + static final int TYPE_NULL = 0; + static final int TYPE_TRUE = 1; + static final int TYPE_FALSE = 2; + static final int TYPE_INT8 = 3; + static final int TYPE_INT16 = 4; + static final int TYPE_INT32 = 5; + static final int TYPE_INT64 = 6; + static final int TYPE_DOUBLE = 7; + static final int TYPE_DECIMAL4 = 8; + static final int TYPE_DECIMAL8 = 9; + static final int TYPE_DECIMAL16 = 10; + static final int TYPE_DATE = 11; + static final int TYPE_TIMESTAMPTZ = 12; // equivalent to timestamptz + static final int TYPE_TIMESTAMPNTZ = 13; // equivalent to timestamp + static final int TYPE_FLOAT = 14; + static final int TYPE_BINARY = 15; + static final int TYPE_STRING = 16; + + static final int PRIMITIVE_TYPE_SHIFT = 2; + + private Primitives() {} + } + + static final int HEADER_SIZE = 1; + + enum BasicType { + PRIMITIVE, + SHORT_STRING, + OBJECT, + ARRAY + } + + public static VariantValue from(ByteBuffer metadata, ByteBuffer value) { Review Comment: Personally I would like to refactor those static methods into their own interface. 1. ``` public static VariantValue from(ByteBuffer metadata, ByteBuffer value) static VariantValue from(SerializedMetadata metadata, ByteBuffer value) ``` 2. Move of<Primitive> to VariantPrimitive class. But it's personal preference. ########## core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.Pair; +import org.apache.iceberg.util.SortedMerge; + +/** + * A variant Object that handles full or partial shredding. + * + * <p>Metadata stored for an object must be the same regardless of whether the object is shredded. + * This class assumes that the metadata from the unshredded object can be used for the shredded + * fields. This also does not allow updating or replacing the metadata for the unshredded object, + * which could require recursively rewriting field IDs. + */ +class ShreddedObject implements VariantObject { Review Comment: Are you intentionally including shredded implementation in this PR? We probably should focus on the basic Variant core read in this PR. ########## core/src/main/java/org/apache/iceberg/variants/VariantObject.java: ########## @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +/** An variant object value. */ +public interface VariantObject extends VariantValue { + /** Returns the {@link VariantValue} for the field named {@code name} in this object. */ + VariantValue get(String name); + + default Variants.PhysicalType type() { Review Comment: Add `@Overrride` for this method. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org