aihuaxu commented on code in PR #11415: URL: https://github.com/apache/iceberg/pull/11415#discussion_r1821861473
########## core/src/main/java/org/apache/iceberg/Variants.java: ########## @@ -0,0 +1,207 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; + +public class Variants { + enum LogicalType { + NULL, + BOOLEAN, + EXACT_NUMERIC, + FLOAT, + DOUBLE, + DATE, + TIMESTAMPTZ, + TIMESTAMPNTZ, + BINARY, + STRING, + ARRAY, + OBJECT + } + + public enum PhysicalType { + NULL(LogicalType.NULL, Void.class), + BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), + BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), + INT8(LogicalType.EXACT_NUMERIC, Integer.class), + INT16(LogicalType.EXACT_NUMERIC, Integer.class), + INT32(LogicalType.EXACT_NUMERIC, Integer.class), + INT64(LogicalType.EXACT_NUMERIC, Long.class), + DOUBLE(LogicalType.DOUBLE, Double.class), + DECIMAL4(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL8(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL16(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DATE(LogicalType.DATE, Integer.class), + TIMESTAMPTZ(LogicalType.TIMESTAMPTZ, Long.class), + TIMESTAMPNTZ(LogicalType.TIMESTAMPNTZ, Long.class), + FLOAT(LogicalType.FLOAT, Float.class), + BINARY(LogicalType.BINARY, ByteBuffer.class), + STRING(LogicalType.STRING, String.class), + ARRAY(LogicalType.ARRAY, List.class), + OBJECT(LogicalType.OBJECT, Map.class); + + private final LogicalType logicalType; + private final Class<?> javaClass; + + PhysicalType(LogicalType logicalType, Class<?> javaClass) { + this.logicalType = logicalType; + this.javaClass = javaClass; + } + + LogicalType toLogicalType() { + return logicalType; + } + + public Class<?> javaClass() { + return javaClass; + } + + public static PhysicalType from(int primitiveType) { + switch (primitiveType) { + case Primitives.TYPE_NULL: + return NULL; + case Primitives.TYPE_TRUE: + return BOOLEAN_TRUE; + case Primitives.TYPE_FALSE: + return BOOLEAN_FALSE; + case Primitives.TYPE_INT8: + return INT8; + case Primitives.TYPE_INT16: + return INT16; + case Primitives.TYPE_INT32: + return INT32; + case Primitives.TYPE_INT64: + return INT64; + case Primitives.TYPE_DATE: + return DATE; + case Primitives.TYPE_TIMESTAMPTZ: + return TIMESTAMPTZ; + case Primitives.TYPE_TIMESTAMPNTZ: + return TIMESTAMPNTZ; + case Primitives.TYPE_FLOAT: + return FLOAT; + case Primitives.TYPE_DOUBLE: + return DOUBLE; + case Primitives.TYPE_DECIMAL4: + return DECIMAL4; + case Primitives.TYPE_DECIMAL8: + return DECIMAL8; + case Primitives.TYPE_DECIMAL16: + return DECIMAL16; + case Primitives.TYPE_BINARY: + return BINARY; + case Primitives.TYPE_STRING: + return STRING; + } + + throw new UnsupportedOperationException("Unknown primitive physical type: " + primitiveType); + } + } + + public interface Serialized { + ByteBuffer buffer(); + } + + public interface Metadata extends Serialized { Review Comment: I think it makes sense to have VariantPrimitive, VariantArray, VariantObject classes which implements VariantAccessor (or similar name). We probably shouldn't expose Metadata or Value directly since those are more internal representation of a Variant. Similarly `buffer()` may not need to be exposed as well if possible? ########## core/src/main/java/org/apache/iceberg/VariantPrimitive.java: ########## @@ -0,0 +1,126 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantPrimitive implements Variants.Primitive<Object>, Variants.Serialized { + private static final int PRIMITIVE_TYPE_SHIFT = 2; + private static final int PRIMITIVE_OFFSET = Variants.HEADER_SIZE; + + static VariantPrimitive from(byte[] bytes) { + return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static VariantPrimitive from(ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int basicType = header & Variants.BASIC_TYPE_MASK; + Preconditions.checkArgument( + basicType == Variants.BASIC_TYPE_PRIMITIVE, + "Invalid primitive, basic type != 0: " + basicType); + return new VariantPrimitive(value, header); + } + + private final ByteBuffer value; + private final Variants.PhysicalType type; + private Object primitive = null; + + private VariantPrimitive(ByteBuffer value, int header) { + this.value = value; + this.type = Variants.PhysicalType.from(header >> PRIMITIVE_TYPE_SHIFT); + } + + private Object read() { + switch (type) { + case NULL: + return null; + case BOOLEAN_TRUE: + return true; + case BOOLEAN_FALSE: + return false; + case INT8: + return VariantUtil.readLittleEndianInt8(value, PRIMITIVE_OFFSET); + case INT16: + return VariantUtil.readLittleEndianInt16(value, PRIMITIVE_OFFSET); + case INT32: + case DATE: + return VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + case INT64: + case TIMESTAMPTZ: + case TIMESTAMPNTZ: + return VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET); + case FLOAT: + return VariantUtil.readFloat(value, PRIMITIVE_OFFSET); + case DOUBLE: + return VariantUtil.readDouble(value, PRIMITIVE_OFFSET); + case DECIMAL4: + { + int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + int unscaled = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET + 1); + return new BigDecimal(BigInteger.valueOf(unscaled), scale); + } + case DECIMAL8: + { + int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + long unscaled = VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET + 1); + return new BigDecimal(BigInteger.valueOf(unscaled), scale); + } + case DECIMAL16: + throw new UnsupportedOperationException("unsupported"); Review Comment: Are we implementing this separately? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org