[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051360946 ## flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java: ## @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.data; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Map; +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ByteBuffers; + +@Internal +public class StructRowData implements RowData { + private final Types.StructType type; + private RowKind kind; + private StructLike struct; + + public StructRowData(Types.StructType type) { +this(type, RowKind.INSERT); + } + + public StructRowData(Types.StructType type, RowKind kind) { +this(type, null, kind); + } + + private StructRowData(Types.StructType type, StructLike struct) { +this(type, struct, RowKind.INSERT); + } + + private StructRowData(Types.StructType type, StructLike struct, RowKind kind) { +this.type = type; +this.struct = struct; +this.kind = kind; + } + + public StructRowData setStruct(StructLike newStruct) { +this.struct = newStruct; +return this; + } + + @Override + public int getArity() { +return struct.size(); + } + + @Override + public RowKind getRowKind() { +return kind; + } + + @Override + public void setRowKind(RowKind newKind) { +Preconditions.checkNotNull(newKind, "kind can not be null"); +this.kind = newKind; + } + + @Override + public boolean isNullAt(int pos) { +return struct.get(pos, Object.class) == null; + } + + @Override + public boolean getBoolean(int pos) { +return struct.get(pos, Boolean.class); + } + + @Override + public byte getByte(int pos) { +return (byte) (int) struct.get(pos, Integer.class); + } + + @Override + public short getShort(int pos) { +return (short) (int) struct.get(pos, Integer.class); + } + + @Override + public int getInt(int pos) { +Object integer = struct.get(pos, Object.class); + +if (integer instanceof Integer) { + return (int) integer; +} else if (integer instanceof LocalDate) { + return (int) ((LocalDate) integer).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for int field. Type name: " + integer.getClass().getName()); +} + } + + @Override + public long getLong(int pos) { +Object longVal = struct.get(pos, Object.class); + +if (longVal instanceof Long) { + return (long) longVal; +} else if (longVal instanceof OffsetDateTime) { + return Duration.between(Instant.EPOCH, (OffsetDateTime) longVal).toNanos() / 1000; +} else if (longVal instanceof LocalDate) { + return ((LocalDate) longVal).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for long field. Type name: " + longVal.getClass().getName()); +} + } + + @Override + public float getFloat(int pos) { +return struct.get(pos, Float.class); + } + + @Override + public double getDouble(int pos) { +return struct.get(pos, Double.class); + } + + @Override + public StringData getString(int pos) { +return isNullAt(pos) ?
[GitHub] [iceberg] Fokko commented on a diff in pull request #6445: Python: Mock home and root folder when running `test_missing_uri`
Fokko commented on code in PR #6445: URL: https://github.com/apache/iceberg/pull/6445#discussion_r1051363319 ## python/tests/cli/test_console.py: ## @@ -134,14 +134,17 @@ def update_namespace_properties( MOCK_ENVIRONMENT = {"PYICEBERG_CATALOG__PRODUCTION__URI": "test://doesnotexist"} -def test_missing_uri() -> None: -runner = CliRunner() -result = runner.invoke(run, ["list"]) -assert result.exit_code == 1 -assert ( -result.output -== "URI missing, please provide using --uri, the config or environment variable \nPYICEBERG_CATALOG__DEFAULT__URI\n" -) +def test_missing_uri(empty_home_dir_path: str) -> None: Review Comment: But then we also should invoke the `make_temporary_home_folder`, I prefer the current solution. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] Fokko merged pull request #6445: Python: Mock home and root folder when running `test_missing_uri`
Fokko merged PR #6445: URL: https://github.com/apache/iceberg/pull/6445 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051368174 ## flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestHelpers.java: ## @@ -295,6 +299,161 @@ private static void assertEquals( } } + public static void assertEqualsSafe( + Schema schema, List recs, List rows) { +Streams.forEachPair( +recs.stream(), rows.stream(), (rec, row) -> assertEqualsSafe(schema, rec, row)); + } + + public static void assertEqualsSafe(Schema schema, GenericData.Record rec, Row row) { +List fields = schema.asStruct().fields(); +RowType rowType = FlinkSchemaUtil.convert(schema); +for (int i = 0; i < fields.size(); i += 1) { + Type fieldType = fields.get(i).type(); + Object expectedValue = rec.get(i); + Object actualValue = row.getField(i); + LogicalType logicalType = rowType.getTypeAt(i); + assertAvroEquals(fieldType, logicalType, expectedValue, actualValue); +} + } + + private static void assertEqualsSafe(Types.StructType struct, GenericData.Record rec, Row row) { +List fields = struct.fields(); +for (int i = 0; i < fields.size(); i += 1) { + Type fieldType = fields.get(i).type(); + Object expectedValue = rec.get(i); + Object actualValue = row.getField(i); + assertAvroEquals(fieldType, null, expectedValue, actualValue); +} + } + + private static void assertAvroEquals( + Type type, LogicalType logicalType, Object expected, Object actual) { + +if (expected == null && actual == null) { + return; +} + +Assert.assertTrue( +"expected and actual should be both null or not null", expected != null && actual != null); + +switch (type.typeId()) { + case BOOLEAN: Review Comment: but string is a primitive type. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] pvary commented on issue #6370: What is the purpose of Hive Lock ?
pvary commented on issue #6370: URL: https://github.com/apache/iceberg/issues/6370#issuecomment-1356124377 Maybe something like this: ``` diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 179a4960b9..6a3f9e40fa 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -22,7 +22,7 @@ # Thrift Service that the MetaStore is built on # -include "share/fb303/if/fb303.thrift" +include "/Users/petervary/tmp/fb303.thrift" namespace java org.apache.hadoop.hive.metastore.api namespace php metastore @@ -2165,7 +2165,9 @@ struct AlterTableRequest { 6: optional i64 writeId=-1, 7: optional string validWriteIdList 8: optional list processorCapabilities, - 9: optional string processorIdentifier + 9: optional string processorIdentifier, + 10: optional string expectedPropertyKey, + 12: optional string expectedPropertyValue // TODO: also add cascade here, out of envCtx } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 1226cd1a1a..0b12e68401 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -102,7 +102,7 @@ public void setConf(Configuration conf) { @Override public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbname, String name, Table newt, EnvironmentContext environmentContext, - IHMSHandler handler, String writeIdList) + IHMSHandler handler, String writeIdList, String expectedKey, String expectedValue) throws InvalidOperationException, MetaException { catName = normalizeIdentifier(catName); name = name.toLowerCase(); @@ -187,6 +187,11 @@ public void alterTable(RawStore msdb, Warehouse wh, String catName, String dbnam TableName.getQualified(catName, dbname, name) + " doesn't exist"); } + if (expectedKey != null && !oldt.getParameters().get(expectedKey).equals(expectedValue)) { +throw new MetaException("The table already modified. The parameter value for key: " + expectedKey + " is " ++ oldt.getParameters().get(expectedKey) + ". The expected was value was " + expectedValue); + } + validateTableChangesOnReplSource(olddb, oldt, newt, environmentContext); // On a replica this alter table will be executed only if old and new both the databases are ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051380948 ## core/src/main/java/org/apache/iceberg/BaseFilesTable.java: ## @@ -223,34 +225,28 @@ ManifestFile manifest() { static class ContentFileStructWithMetrics implements StructLike { private final StructLike fileAsStruct; private final MetricsUtil.ReadableMetricsStruct readableMetrics; -private final int expectedSize; +private final int position; ContentFileStructWithMetrics( -int expectedSize, -StructLike fileAsStruct, -MetricsUtil.ReadableMetricsStruct readableMetrics) { +int position, StructLike fileAsStruct, MetricsUtil.ReadableMetricsStruct readableMetrics) { this.fileAsStruct = fileAsStruct; this.readableMetrics = readableMetrics; - this.expectedSize = expectedSize; + this.position = position; } @Override public int size() { - return expectedSize; + return position; } @Override public T get(int pos, Class javaClass) { - int lastExpectedIndex = expectedSize - 1; - if (pos < lastExpectedIndex) { + if (pos < position) { return fileAsStruct.get(pos, javaClass); - } else if (pos == lastExpectedIndex) { + } else if (pos == position) { return javaClass.cast(readableMetrics); } else { -throw new IllegalArgumentException( Review Comment: metrics occupies a position. `fileAsStruct` size + `Metrics` size(is 1) is equal to the size of `projection`. When pos is greater than metricsPosition, the actual position of the field in `fileAsStruct` should be subtracted by 1. For instance: ``` fileAsStruct :[0->c1, 1->c2] metrics :[0->c3] project:[c1,c3,c2] ``` When we want to get `c2` (its pos is 2), but actually 1 in `fileAsStruct`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070 ## flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java: ## @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.data; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Map; +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ByteBuffers; + +@Internal +public class StructRowData implements RowData { + private final Types.StructType type; + private RowKind kind; + private StructLike struct; + + public StructRowData(Types.StructType type) { +this(type, RowKind.INSERT); + } + + public StructRowData(Types.StructType type, RowKind kind) { +this(type, null, kind); + } + + private StructRowData(Types.StructType type, StructLike struct) { +this(type, struct, RowKind.INSERT); + } + + private StructRowData(Types.StructType type, StructLike struct, RowKind kind) { +this.type = type; +this.struct = struct; +this.kind = kind; + } + + public StructRowData setStruct(StructLike newStruct) { +this.struct = newStruct; +return this; + } + + @Override + public int getArity() { +return struct.size(); + } + + @Override + public RowKind getRowKind() { +return kind; + } + + @Override + public void setRowKind(RowKind newKind) { +Preconditions.checkNotNull(newKind, "kind can not be null"); +this.kind = newKind; + } + + @Override + public boolean isNullAt(int pos) { +return struct.get(pos, Object.class) == null; + } + + @Override + public boolean getBoolean(int pos) { +return struct.get(pos, Boolean.class); + } + + @Override + public byte getByte(int pos) { +return (byte) (int) struct.get(pos, Integer.class); + } + + @Override + public short getShort(int pos) { +return (short) (int) struct.get(pos, Integer.class); + } + + @Override + public int getInt(int pos) { +Object integer = struct.get(pos, Object.class); + +if (integer instanceof Integer) { + return (int) integer; +} else if (integer instanceof LocalDate) { + return (int) ((LocalDate) integer).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for int field. Type name: " + integer.getClass().getName()); +} + } + + @Override + public long getLong(int pos) { +Object longVal = struct.get(pos, Object.class); + +if (longVal instanceof Long) { + return (long) longVal; +} else if (longVal instanceof OffsetDateTime) { + return Duration.between(Instant.EPOCH, (OffsetDateTime) longVal).toNanos() / 1000; +} else if (longVal instanceof LocalDate) { + return ((LocalDate) longVal).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for long field. Type name: " + longVal.getClass().getName()); +} + } + + @Override + public float getFloat(int pos) { +return struct.get(pos, Float.class); + } + + @Override + public double getDouble(int pos) { +return struct.get(pos, Double.class); + } + + @Override + public StringData getString(int pos) { +return isNullAt(pos) ?
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070 ## flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java: ## @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.data; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Map; +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ByteBuffers; + +@Internal +public class StructRowData implements RowData { + private final Types.StructType type; + private RowKind kind; + private StructLike struct; + + public StructRowData(Types.StructType type) { +this(type, RowKind.INSERT); + } + + public StructRowData(Types.StructType type, RowKind kind) { +this(type, null, kind); + } + + private StructRowData(Types.StructType type, StructLike struct) { +this(type, struct, RowKind.INSERT); + } + + private StructRowData(Types.StructType type, StructLike struct, RowKind kind) { +this.type = type; +this.struct = struct; +this.kind = kind; + } + + public StructRowData setStruct(StructLike newStruct) { +this.struct = newStruct; +return this; + } + + @Override + public int getArity() { +return struct.size(); + } + + @Override + public RowKind getRowKind() { +return kind; + } + + @Override + public void setRowKind(RowKind newKind) { +Preconditions.checkNotNull(newKind, "kind can not be null"); +this.kind = newKind; + } + + @Override + public boolean isNullAt(int pos) { +return struct.get(pos, Object.class) == null; + } + + @Override + public boolean getBoolean(int pos) { +return struct.get(pos, Boolean.class); + } + + @Override + public byte getByte(int pos) { +return (byte) (int) struct.get(pos, Integer.class); + } + + @Override + public short getShort(int pos) { +return (short) (int) struct.get(pos, Integer.class); + } + + @Override + public int getInt(int pos) { +Object integer = struct.get(pos, Object.class); + +if (integer instanceof Integer) { + return (int) integer; +} else if (integer instanceof LocalDate) { + return (int) ((LocalDate) integer).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for int field. Type name: " + integer.getClass().getName()); +} + } + + @Override + public long getLong(int pos) { +Object longVal = struct.get(pos, Object.class); + +if (longVal instanceof Long) { + return (long) longVal; +} else if (longVal instanceof OffsetDateTime) { + return Duration.between(Instant.EPOCH, (OffsetDateTime) longVal).toNanos() / 1000; +} else if (longVal instanceof LocalDate) { + return ((LocalDate) longVal).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for long field. Type name: " + longVal.getClass().getName()); +} + } + + @Override + public float getFloat(int pos) { +return struct.get(pos, Float.class); + } + + @Override + public double getDouble(int pos) { +return struct.get(pos, Double.class); + } + + @Override + public StringData getString(int pos) { +return isNullAt(pos) ?
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070 ## flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java: ## @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.data; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Map; +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ByteBuffers; + +@Internal +public class StructRowData implements RowData { + private final Types.StructType type; + private RowKind kind; + private StructLike struct; + + public StructRowData(Types.StructType type) { +this(type, RowKind.INSERT); + } + + public StructRowData(Types.StructType type, RowKind kind) { +this(type, null, kind); + } + + private StructRowData(Types.StructType type, StructLike struct) { +this(type, struct, RowKind.INSERT); + } + + private StructRowData(Types.StructType type, StructLike struct, RowKind kind) { +this.type = type; +this.struct = struct; +this.kind = kind; + } + + public StructRowData setStruct(StructLike newStruct) { +this.struct = newStruct; +return this; + } + + @Override + public int getArity() { +return struct.size(); + } + + @Override + public RowKind getRowKind() { +return kind; + } + + @Override + public void setRowKind(RowKind newKind) { +Preconditions.checkNotNull(newKind, "kind can not be null"); +this.kind = newKind; + } + + @Override + public boolean isNullAt(int pos) { +return struct.get(pos, Object.class) == null; + } + + @Override + public boolean getBoolean(int pos) { +return struct.get(pos, Boolean.class); + } + + @Override + public byte getByte(int pos) { +return (byte) (int) struct.get(pos, Integer.class); + } + + @Override + public short getShort(int pos) { +return (short) (int) struct.get(pos, Integer.class); + } + + @Override + public int getInt(int pos) { +Object integer = struct.get(pos, Object.class); + +if (integer instanceof Integer) { + return (int) integer; +} else if (integer instanceof LocalDate) { + return (int) ((LocalDate) integer).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for int field. Type name: " + integer.getClass().getName()); +} + } + + @Override + public long getLong(int pos) { +Object longVal = struct.get(pos, Object.class); + +if (longVal instanceof Long) { + return (long) longVal; +} else if (longVal instanceof OffsetDateTime) { + return Duration.between(Instant.EPOCH, (OffsetDateTime) longVal).toNanos() / 1000; +} else if (longVal instanceof LocalDate) { + return ((LocalDate) longVal).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for long field. Type name: " + longVal.getClass().getName()); +} + } + + @Override + public float getFloat(int pos) { +return struct.get(pos, Float.class); + } + + @Override + public double getDouble(int pos) { +return struct.get(pos, Double.class); + } + + @Override + public StringData getString(int pos) { +return isNullAt(pos) ?
[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table
hililiwei commented on code in PR #6222: URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051383730 ## flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java: ## @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.data; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Map; +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ByteBuffers; + +@Internal +public class StructRowData implements RowData { + private final Types.StructType type; + private RowKind kind; + private StructLike struct; + + public StructRowData(Types.StructType type) { +this(type, RowKind.INSERT); + } + + public StructRowData(Types.StructType type, RowKind kind) { +this(type, null, kind); + } + + private StructRowData(Types.StructType type, StructLike struct) { +this(type, struct, RowKind.INSERT); + } + + private StructRowData(Types.StructType type, StructLike struct, RowKind kind) { +this.type = type; +this.struct = struct; +this.kind = kind; + } + + public StructRowData setStruct(StructLike newStruct) { +this.struct = newStruct; +return this; + } + + @Override + public int getArity() { +return struct.size(); + } + + @Override + public RowKind getRowKind() { +return kind; + } + + @Override + public void setRowKind(RowKind newKind) { +Preconditions.checkNotNull(newKind, "kind can not be null"); +this.kind = newKind; + } + + @Override + public boolean isNullAt(int pos) { +return struct.get(pos, Object.class) == null; + } + + @Override + public boolean getBoolean(int pos) { +return struct.get(pos, Boolean.class); + } + + @Override + public byte getByte(int pos) { +return (byte) (int) struct.get(pos, Integer.class); + } + + @Override + public short getShort(int pos) { +return (short) (int) struct.get(pos, Integer.class); + } + + @Override + public int getInt(int pos) { +Object integer = struct.get(pos, Object.class); + +if (integer instanceof Integer) { + return (int) integer; +} else if (integer instanceof LocalDate) { + return (int) ((LocalDate) integer).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for int field. Type name: " + integer.getClass().getName()); +} + } + + @Override + public long getLong(int pos) { +Object longVal = struct.get(pos, Object.class); + +if (longVal instanceof Long) { + return (long) longVal; +} else if (longVal instanceof OffsetDateTime) { + return Duration.between(Instant.EPOCH, (OffsetDateTime) longVal).toNanos() / 1000; +} else if (longVal instanceof LocalDate) { + return ((LocalDate) longVal).toEpochDay(); +} else { + throw new IllegalStateException( + "Unknown type for long field. Type name: " + longVal.getClass().getName()); +} + } + + @Override + public float getFloat(int pos) { +return struct.get(pos, Float.class); + } + + @Override + public double getDouble(int pos) { +return struct.get(pos, Double.class); + } + + @Override + public StringData getString(int pos) { +return isNullAt(pos) ?
[GitHub] [iceberg] blcksrx commented on pull request #4465: Add FileIO implementation for Azure Blob Storage
blcksrx commented on PR #4465: URL: https://github.com/apache/iceberg/pull/4465#issuecomment-1356227315 Any plan to merge this PR? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg-docs] InvisibleProgrammer opened a new pull request, #191: Fix sidebar
InvisibleProgrammer opened a new pull request, #191: URL: https://github.com/apache/iceberg-docs/pull/191 Reason and sample screenshots: https://github.com/apache/iceberg/issues/6381 This provides a fix for an issue in that the sidebar menu completely disappears from the documentation side on a resolution that is lower than 1280px. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] InvisibleProgrammer commented on pull request #6379: Docs: Update Iceberg Hive documentation - 1.0.x (#6337)
InvisibleProgrammer commented on PR #6379: URL: https://github.com/apache/iceberg/pull/6379#issuecomment-1356327476 Hey there! With the help of @pvary , all updates for the older releases are merged. I wonder, do you know what is the process of releasing the merged changes? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] maximethebault commented on issue #6224: Spark: regression / query failure with Iceberg 1.0.0 and UNION
maximethebault commented on issue #6224: URL: https://github.com/apache/iceberg/issues/6224#issuecomment-1356353314 Thanks for investigating this issue further! I'll go ahead and close this issue since it isn't Iceberg-related. I'll make sure to keep an eye on the Spark issue you created. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] maximethebault closed issue #6224: Spark: regression / query failure with Iceberg 1.0.0 and UNION
maximethebault closed issue #6224: Spark: regression / query failure with Iceberg 1.0.0 and UNION URL: https://github.com/apache/iceberg/issues/6224 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] dennishuo commented on pull request #6428: Add new SnowflakeCatalog implementation to enable directly using Snowflake-managed Iceberg tables
dennishuo commented on PR #6428: URL: https://github.com/apache/iceberg/pull/6428#issuecomment-1356445009 Interesting, not sure why it removed @danielcweeks when I re-requested review from @nastra (definitely wasn't intentional -- as far as I can tell my repository permissions don't even provide the ability to remove reviewer requests). I wonder if this is possibly another manifestation of https://github.com/community/community/discussions/8939 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] github-actions[bot] commented on issue #5086: Add a connector for Python Flink DataStream API
github-actions[bot] commented on issue #5086: URL: https://github.com/apache/iceberg/issues/5086#issuecomment-1356511133 This issue has been automatically marked as stale because it has been open for 180 days with no activity. It will be closed in next 14 days if no further activity occurs. To permanently prevent this issue from being considered stale, add the label 'not-stale', but commenting on the issue is preferred when possible. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] github-actions[bot] commented on issue #4971: new snapshot file(../metadata/snap-xxx.avro) will be generated as checkpoint commit, even though there is no data from cdc
github-actions[bot] commented on issue #4971: URL: https://github.com/apache/iceberg/issues/4971#issuecomment-1356511150 This issue has been automatically marked as stale because it has been open for 180 days with no activity. It will be closed in next 14 days if no further activity occurs. To permanently prevent this issue from being considered stale, add the label 'not-stale', but commenting on the issue is preferred when possible. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types
amogh-jahagirdar commented on code in PR #3024: URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051508734 ## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/ArrowVectorAccessors.java: ## @@ -31,7 +33,7 @@ final class ArrowVectorAccessors { static { factory = new GenericArrowVectorAccessorFactory<>( -throwingSupplier("Decimal type is not supported"), +JavaDecimalFactory::new, JavaStringFactory::new, throwingSupplier("Struct type is not supported"), throwingSupplier("List type is not supported")); Review Comment: Curious is there an open issue or existing PR on support for struct/list types? I wasn't able to find any ## arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java: ## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow; + +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.function.IntConsumer; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor; +import org.apache.iceberg.arrow.vectorized.VectorHolder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +/** This converts dictionary encoded arrow vectors to a correctly typed arrow vector. */ +public class DictEncodedArrowConverter { + + private DictEncodedArrowConverter() {} + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + public static FieldVector toArrowVector( + VectorHolder vectorHolder, ArrowVectorAccessor accessor) { +Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be null"); +Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot be null"); +if (vectorHolder.isDictionaryEncoded()) { + if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) { +return toDecimalVector(vectorHolder, accessor); + } else if (Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) { +return toTimestampVector(vectorHolder, accessor); + } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) { +return toBigIntVector(vectorHolder, accessor); + } else if (Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) { +return toFloat4Vector(vectorHolder, accessor); + } else if (Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) { +return toFloat8Vector(vectorHolder, accessor); + } else if (Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) { +return toVarCharVector(vectorHolder, accessor); + } else if (Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) { +return toVarBinaryVector(vectorHolder, accessor); + } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) { +return toTimeMicroVector(vectorHolder, accessor); + } + + throw new IllegalArgumentException( + String.format( + "Cannot convert dict encoded field '%s' of type '%s' to Arrow " + + "vector as it is currently not supported", + vectorHolder.icebergField().name(), vectorHolder.icebergType().typeId())); Review Comment: I think this case would get hit even in the case the vector is not dictionary encoded and we'd be surfacing a misleading error message, would it make sense to throw a different exception to
[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types
amogh-jahagirdar commented on code in PR #3024: URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051508734 ## arrow/src/main/java/org/apache/iceberg/arrow/vectorized/ArrowVectorAccessors.java: ## @@ -31,7 +33,7 @@ final class ArrowVectorAccessors { static { factory = new GenericArrowVectorAccessorFactory<>( -throwingSupplier("Decimal type is not supported"), +JavaDecimalFactory::new, JavaStringFactory::new, throwingSupplier("Struct type is not supported"), throwingSupplier("List type is not supported")); Review Comment: Curious would you happen to know if there is an open issue or existing PR on support for struct/list types? I wasn't able to find any -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types
amogh-jahagirdar commented on code in PR #3024: URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051509275 ## arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java: ## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow; + +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.function.IntConsumer; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor; +import org.apache.iceberg.arrow.vectorized.VectorHolder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +/** This converts dictionary encoded arrow vectors to a correctly typed arrow vector. */ +public class DictEncodedArrowConverter { + + private DictEncodedArrowConverter() {} + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + public static FieldVector toArrowVector( + VectorHolder vectorHolder, ArrowVectorAccessor accessor) { +Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be null"); +Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot be null"); +if (vectorHolder.isDictionaryEncoded()) { + if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) { +return toDecimalVector(vectorHolder, accessor); + } else if (Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) { +return toTimestampVector(vectorHolder, accessor); + } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) { +return toBigIntVector(vectorHolder, accessor); + } else if (Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) { +return toFloat4Vector(vectorHolder, accessor); + } else if (Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) { +return toFloat8Vector(vectorHolder, accessor); + } else if (Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) { +return toVarCharVector(vectorHolder, accessor); + } else if (Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) { +return toVarBinaryVector(vectorHolder, accessor); + } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) { +return toTimeMicroVector(vectorHolder, accessor); + } + + throw new IllegalArgumentException( + String.format( + "Cannot convert dict encoded field '%s' of type '%s' to Arrow " + + "vector as it is currently not supported", + vectorHolder.icebergField().name(), vectorHolder.icebergType().typeId())); Review Comment: I think this case would get hit in the case the vector is not dictionary encoded and we'd be surfacing a misleading error message, would it make sense to throw a different exception to indicate that this converter should not be used for non dictionary encoded vectors? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org
[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types
amogh-jahagirdar commented on code in PR #3024: URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051509275 ## arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java: ## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow; + +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.function.IntConsumer; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor; +import org.apache.iceberg.arrow.vectorized.VectorHolder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +/** This converts dictionary encoded arrow vectors to a correctly typed arrow vector. */ +public class DictEncodedArrowConverter { + + private DictEncodedArrowConverter() {} + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + public static FieldVector toArrowVector( + VectorHolder vectorHolder, ArrowVectorAccessor accessor) { +Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be null"); +Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot be null"); +if (vectorHolder.isDictionaryEncoded()) { + if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) { +return toDecimalVector(vectorHolder, accessor); + } else if (Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) { +return toTimestampVector(vectorHolder, accessor); + } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) { +return toBigIntVector(vectorHolder, accessor); + } else if (Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) { +return toFloat4Vector(vectorHolder, accessor); + } else if (Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) { +return toFloat8Vector(vectorHolder, accessor); + } else if (Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) { +return toVarCharVector(vectorHolder, accessor); + } else if (Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) { +return toVarBinaryVector(vectorHolder, accessor); + } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) { +return toTimeMicroVector(vectorHolder, accessor); + } + + throw new IllegalArgumentException( + String.format( + "Cannot convert dict encoded field '%s' of type '%s' to Arrow " + + "vector as it is currently not supported", + vectorHolder.icebergField().name(), vectorHolder.icebergType().typeId())); Review Comment: I think this case would get hit in the case the vector is not dictionary encoded and we'd be surfacing a misleading error message, would it make sense to have a different exception message to indicate that this converter should not be used for non dictionary encoded vectors? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org