[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051360946


##
flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java:
##
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.data;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.GenericArrayData;
+import org.apache.flink.table.data.GenericMapData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+
+@Internal
+public class StructRowData implements RowData {
+  private final Types.StructType type;
+  private RowKind kind;
+  private StructLike struct;
+
+  public StructRowData(Types.StructType type) {
+this(type, RowKind.INSERT);
+  }
+
+  public StructRowData(Types.StructType type, RowKind kind) {
+this(type, null, kind);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct) {
+this(type, struct, RowKind.INSERT);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct, RowKind 
kind) {
+this.type = type;
+this.struct = struct;
+this.kind = kind;
+  }
+
+  public StructRowData setStruct(StructLike newStruct) {
+this.struct = newStruct;
+return this;
+  }
+
+  @Override
+  public int getArity() {
+return struct.size();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+return kind;
+  }
+
+  @Override
+  public void setRowKind(RowKind newKind) {
+Preconditions.checkNotNull(newKind, "kind can not be null");
+this.kind = newKind;
+  }
+
+  @Override
+  public boolean isNullAt(int pos) {
+return struct.get(pos, Object.class) == null;
+  }
+
+  @Override
+  public boolean getBoolean(int pos) {
+return struct.get(pos, Boolean.class);
+  }
+
+  @Override
+  public byte getByte(int pos) {
+return (byte) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public short getShort(int pos) {
+return (short) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public int getInt(int pos) {
+Object integer = struct.get(pos, Object.class);
+
+if (integer instanceof Integer) {
+  return (int) integer;
+} else if (integer instanceof LocalDate) {
+  return (int) ((LocalDate) integer).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for int field. Type name: " + 
integer.getClass().getName());
+}
+  }
+
+  @Override
+  public long getLong(int pos) {
+Object longVal = struct.get(pos, Object.class);
+
+if (longVal instanceof Long) {
+  return (long) longVal;
+} else if (longVal instanceof OffsetDateTime) {
+  return Duration.between(Instant.EPOCH, (OffsetDateTime) 
longVal).toNanos() / 1000;
+} else if (longVal instanceof LocalDate) {
+  return ((LocalDate) longVal).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for long field. Type name: " + 
longVal.getClass().getName());
+}
+  }
+
+  @Override
+  public float getFloat(int pos) {
+return struct.get(pos, Float.class);
+  }
+
+  @Override
+  public double getDouble(int pos) {
+return struct.get(pos, Double.class);
+  }
+
+  @Override
+  public StringData getString(int pos) {
+return isNullAt(pos) ? 

[GitHub] [iceberg] Fokko commented on a diff in pull request #6445: Python: Mock home and root folder when running `test_missing_uri`

2022-12-17 Thread GitBox


Fokko commented on code in PR #6445:
URL: https://github.com/apache/iceberg/pull/6445#discussion_r1051363319


##
python/tests/cli/test_console.py:
##
@@ -134,14 +134,17 @@ def update_namespace_properties(
 MOCK_ENVIRONMENT = {"PYICEBERG_CATALOG__PRODUCTION__URI": 
"test://doesnotexist"}
 
 
-def test_missing_uri() -> None:
-runner = CliRunner()
-result = runner.invoke(run, ["list"])
-assert result.exit_code == 1
-assert (
-result.output
-== "URI missing, please provide using --uri, the config or environment 
variable \nPYICEBERG_CATALOG__DEFAULT__URI\n"
-)
+def test_missing_uri(empty_home_dir_path: str) -> None:

Review Comment:
   But then we also should invoke the `make_temporary_home_folder`, I prefer 
the current solution.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] Fokko merged pull request #6445: Python: Mock home and root folder when running `test_missing_uri`

2022-12-17 Thread GitBox


Fokko merged PR #6445:
URL: https://github.com/apache/iceberg/pull/6445


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051368174


##
flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestHelpers.java:
##
@@ -295,6 +299,161 @@ private static void assertEquals(
 }
   }
 
+  public static void assertEqualsSafe(
+  Schema schema, List recs, List rows) {
+Streams.forEachPair(
+recs.stream(), rows.stream(), (rec, row) -> assertEqualsSafe(schema, 
rec, row));
+  }
+
+  public static void assertEqualsSafe(Schema schema, GenericData.Record rec, 
Row row) {
+List fields = schema.asStruct().fields();
+RowType rowType = FlinkSchemaUtil.convert(schema);
+for (int i = 0; i < fields.size(); i += 1) {
+  Type fieldType = fields.get(i).type();
+  Object expectedValue = rec.get(i);
+  Object actualValue = row.getField(i);
+  LogicalType logicalType = rowType.getTypeAt(i);
+  assertAvroEquals(fieldType, logicalType, expectedValue, actualValue);
+}
+  }
+
+  private static void assertEqualsSafe(Types.StructType struct, 
GenericData.Record rec, Row row) {
+List fields = struct.fields();
+for (int i = 0; i < fields.size(); i += 1) {
+  Type fieldType = fields.get(i).type();
+  Object expectedValue = rec.get(i);
+  Object actualValue = row.getField(i);
+  assertAvroEquals(fieldType, null, expectedValue, actualValue);
+}
+  }
+
+  private static void assertAvroEquals(
+  Type type, LogicalType logicalType, Object expected, Object actual) {
+
+if (expected == null && actual == null) {
+  return;
+}
+
+Assert.assertTrue(
+"expected and actual should be both null or not null", expected != 
null && actual != null);
+
+switch (type.typeId()) {
+  case BOOLEAN:

Review Comment:
   but string is a primitive type.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] pvary commented on issue #6370: What is the purpose of Hive Lock ?

2022-12-17 Thread GitBox


pvary commented on issue #6370:
URL: https://github.com/apache/iceberg/issues/6370#issuecomment-1356124377

   Maybe something like this:
   ```
   diff --git 
a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift 
b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
   index 179a4960b9..6a3f9e40fa 100644
   --- 
a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
   +++ 
b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
   @@ -22,7 +22,7 @@
# Thrift Service that the MetaStore is built on
#

   -include "share/fb303/if/fb303.thrift"
   +include "/Users/petervary/tmp/fb303.thrift"

namespace java org.apache.hadoop.hive.metastore.api
namespace php metastore
   @@ -2165,7 +2165,9 @@ struct AlterTableRequest {
  6: optional i64 writeId=-1,
  7: optional string validWriteIdList
  8: optional list processorCapabilities,
   -  9: optional string processorIdentifier
   +  9: optional string processorIdentifier,
   +  10: optional string expectedPropertyKey,
   +  12: optional string expectedPropertyValue
// TODO: also add cascade here, out of envCtx
}
   
   diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
   index 1226cd1a1a..0b12e68401 100644
   --- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
   +++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
   @@ -102,7 +102,7 @@ public void setConf(Configuration conf) {
  @Override
  public void alterTable(RawStore msdb, Warehouse wh, String catName, 
String dbname,
  String name, Table newt, EnvironmentContext environmentContext,
   -  IHMSHandler handler, String writeIdList)
   +  IHMSHandler handler, String writeIdList, String expectedKey, String 
expectedValue)
  throws InvalidOperationException, MetaException {
catName = normalizeIdentifier(catName);
name = name.toLowerCase();
   @@ -187,6 +187,11 @@ public void alterTable(RawStore msdb, Warehouse wh, 
String catName, String dbnam
TableName.getQualified(catName, dbname, name) + " doesn't 
exist");
  }

   +  if (expectedKey != null && 
!oldt.getParameters().get(expectedKey).equals(expectedValue)) {
   +throw new MetaException("The table already modified. The parameter 
value for key: " + expectedKey + " is "
   ++ oldt.getParameters().get(expectedKey) + ". The expected 
was value was " + expectedValue);
   +  }
   +
  validateTableChangesOnReplSource(olddb, oldt, newt, 
environmentContext);

  // On a replica this alter table will be executed only if old and new 
both the databases are
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051380948


##
core/src/main/java/org/apache/iceberg/BaseFilesTable.java:
##
@@ -223,34 +225,28 @@ ManifestFile manifest() {
   static class ContentFileStructWithMetrics implements StructLike {
 private final StructLike fileAsStruct;
 private final MetricsUtil.ReadableMetricsStruct readableMetrics;
-private final int expectedSize;
+private final int position;
 
 ContentFileStructWithMetrics(
-int expectedSize,
-StructLike fileAsStruct,
-MetricsUtil.ReadableMetricsStruct readableMetrics) {
+int position, StructLike fileAsStruct, 
MetricsUtil.ReadableMetricsStruct readableMetrics) {
   this.fileAsStruct = fileAsStruct;
   this.readableMetrics = readableMetrics;
-  this.expectedSize = expectedSize;
+  this.position = position;
 }
 
 @Override
 public int size() {
-  return expectedSize;
+  return position;
 }
 
 @Override
 public  T get(int pos, Class javaClass) {
-  int lastExpectedIndex = expectedSize - 1;
-  if (pos < lastExpectedIndex) {
+  if (pos < position) {
 return fileAsStruct.get(pos, javaClass);
-  } else if (pos == lastExpectedIndex) {
+  } else if (pos == position) {
 return javaClass.cast(readableMetrics);
   } else {
-throw new IllegalArgumentException(

Review Comment:
   metrics occupies a position.  `fileAsStruct` size + `Metrics` size(is 1) is 
equal to the size of `projection`. 
   
   When pos is greater than metricsPosition, the actual position of the field 
in `fileAsStruct` should be subtracted by 1.
   For instance:
   ```
   fileAsStruct :[0->c1, 1->c2]
   metrics :[0->c3]
   project:[c1,c3,c2]
   ```
   When we want to get `c2` (its pos is 2), but actually 1 in `fileAsStruct`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070


##
flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java:
##
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.data;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.GenericArrayData;
+import org.apache.flink.table.data.GenericMapData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+
+@Internal
+public class StructRowData implements RowData {
+  private final Types.StructType type;
+  private RowKind kind;
+  private StructLike struct;
+
+  public StructRowData(Types.StructType type) {
+this(type, RowKind.INSERT);
+  }
+
+  public StructRowData(Types.StructType type, RowKind kind) {
+this(type, null, kind);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct) {
+this(type, struct, RowKind.INSERT);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct, RowKind 
kind) {
+this.type = type;
+this.struct = struct;
+this.kind = kind;
+  }
+
+  public StructRowData setStruct(StructLike newStruct) {
+this.struct = newStruct;
+return this;
+  }
+
+  @Override
+  public int getArity() {
+return struct.size();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+return kind;
+  }
+
+  @Override
+  public void setRowKind(RowKind newKind) {
+Preconditions.checkNotNull(newKind, "kind can not be null");
+this.kind = newKind;
+  }
+
+  @Override
+  public boolean isNullAt(int pos) {
+return struct.get(pos, Object.class) == null;
+  }
+
+  @Override
+  public boolean getBoolean(int pos) {
+return struct.get(pos, Boolean.class);
+  }
+
+  @Override
+  public byte getByte(int pos) {
+return (byte) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public short getShort(int pos) {
+return (short) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public int getInt(int pos) {
+Object integer = struct.get(pos, Object.class);
+
+if (integer instanceof Integer) {
+  return (int) integer;
+} else if (integer instanceof LocalDate) {
+  return (int) ((LocalDate) integer).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for int field. Type name: " + 
integer.getClass().getName());
+}
+  }
+
+  @Override
+  public long getLong(int pos) {
+Object longVal = struct.get(pos, Object.class);
+
+if (longVal instanceof Long) {
+  return (long) longVal;
+} else if (longVal instanceof OffsetDateTime) {
+  return Duration.between(Instant.EPOCH, (OffsetDateTime) 
longVal).toNanos() / 1000;
+} else if (longVal instanceof LocalDate) {
+  return ((LocalDate) longVal).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for long field. Type name: " + 
longVal.getClass().getName());
+}
+  }
+
+  @Override
+  public float getFloat(int pos) {
+return struct.get(pos, Float.class);
+  }
+
+  @Override
+  public double getDouble(int pos) {
+return struct.get(pos, Double.class);
+  }
+
+  @Override
+  public StringData getString(int pos) {
+return isNullAt(pos) ? 

[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070


##
flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java:
##
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.data;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.GenericArrayData;
+import org.apache.flink.table.data.GenericMapData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+
+@Internal
+public class StructRowData implements RowData {
+  private final Types.StructType type;
+  private RowKind kind;
+  private StructLike struct;
+
+  public StructRowData(Types.StructType type) {
+this(type, RowKind.INSERT);
+  }
+
+  public StructRowData(Types.StructType type, RowKind kind) {
+this(type, null, kind);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct) {
+this(type, struct, RowKind.INSERT);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct, RowKind 
kind) {
+this.type = type;
+this.struct = struct;
+this.kind = kind;
+  }
+
+  public StructRowData setStruct(StructLike newStruct) {
+this.struct = newStruct;
+return this;
+  }
+
+  @Override
+  public int getArity() {
+return struct.size();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+return kind;
+  }
+
+  @Override
+  public void setRowKind(RowKind newKind) {
+Preconditions.checkNotNull(newKind, "kind can not be null");
+this.kind = newKind;
+  }
+
+  @Override
+  public boolean isNullAt(int pos) {
+return struct.get(pos, Object.class) == null;
+  }
+
+  @Override
+  public boolean getBoolean(int pos) {
+return struct.get(pos, Boolean.class);
+  }
+
+  @Override
+  public byte getByte(int pos) {
+return (byte) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public short getShort(int pos) {
+return (short) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public int getInt(int pos) {
+Object integer = struct.get(pos, Object.class);
+
+if (integer instanceof Integer) {
+  return (int) integer;
+} else if (integer instanceof LocalDate) {
+  return (int) ((LocalDate) integer).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for int field. Type name: " + 
integer.getClass().getName());
+}
+  }
+
+  @Override
+  public long getLong(int pos) {
+Object longVal = struct.get(pos, Object.class);
+
+if (longVal instanceof Long) {
+  return (long) longVal;
+} else if (longVal instanceof OffsetDateTime) {
+  return Duration.between(Instant.EPOCH, (OffsetDateTime) 
longVal).toNanos() / 1000;
+} else if (longVal instanceof LocalDate) {
+  return ((LocalDate) longVal).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for long field. Type name: " + 
longVal.getClass().getName());
+}
+  }
+
+  @Override
+  public float getFloat(int pos) {
+return struct.get(pos, Float.class);
+  }
+
+  @Override
+  public double getDouble(int pos) {
+return struct.get(pos, Double.class);
+  }
+
+  @Override
+  public StringData getString(int pos) {
+return isNullAt(pos) ? 

[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051382070


##
flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java:
##
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.data;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.GenericArrayData;
+import org.apache.flink.table.data.GenericMapData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+
+@Internal
+public class StructRowData implements RowData {
+  private final Types.StructType type;
+  private RowKind kind;
+  private StructLike struct;
+
+  public StructRowData(Types.StructType type) {
+this(type, RowKind.INSERT);
+  }
+
+  public StructRowData(Types.StructType type, RowKind kind) {
+this(type, null, kind);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct) {
+this(type, struct, RowKind.INSERT);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct, RowKind 
kind) {
+this.type = type;
+this.struct = struct;
+this.kind = kind;
+  }
+
+  public StructRowData setStruct(StructLike newStruct) {
+this.struct = newStruct;
+return this;
+  }
+
+  @Override
+  public int getArity() {
+return struct.size();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+return kind;
+  }
+
+  @Override
+  public void setRowKind(RowKind newKind) {
+Preconditions.checkNotNull(newKind, "kind can not be null");
+this.kind = newKind;
+  }
+
+  @Override
+  public boolean isNullAt(int pos) {
+return struct.get(pos, Object.class) == null;
+  }
+
+  @Override
+  public boolean getBoolean(int pos) {
+return struct.get(pos, Boolean.class);
+  }
+
+  @Override
+  public byte getByte(int pos) {
+return (byte) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public short getShort(int pos) {
+return (short) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public int getInt(int pos) {
+Object integer = struct.get(pos, Object.class);
+
+if (integer instanceof Integer) {
+  return (int) integer;
+} else if (integer instanceof LocalDate) {
+  return (int) ((LocalDate) integer).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for int field. Type name: " + 
integer.getClass().getName());
+}
+  }
+
+  @Override
+  public long getLong(int pos) {
+Object longVal = struct.get(pos, Object.class);
+
+if (longVal instanceof Long) {
+  return (long) longVal;
+} else if (longVal instanceof OffsetDateTime) {
+  return Duration.between(Instant.EPOCH, (OffsetDateTime) 
longVal).toNanos() / 1000;
+} else if (longVal instanceof LocalDate) {
+  return ((LocalDate) longVal).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for long field. Type name: " + 
longVal.getClass().getName());
+}
+  }
+
+  @Override
+  public float getFloat(int pos) {
+return struct.get(pos, Float.class);
+  }
+
+  @Override
+  public double getDouble(int pos) {
+return struct.get(pos, Double.class);
+  }
+
+  @Override
+  public StringData getString(int pos) {
+return isNullAt(pos) ? 

[GitHub] [iceberg] hililiwei commented on a diff in pull request #6222: Flink: Support inspecting table

2022-12-17 Thread GitBox


hililiwei commented on code in PR #6222:
URL: https://github.com/apache/iceberg/pull/6222#discussion_r1051383730


##
flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/data/StructRowData.java:
##
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.data;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Map;
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.DecimalData;
+import org.apache.flink.table.data.GenericArrayData;
+import org.apache.flink.table.data.GenericMapData;
+import org.apache.flink.table.data.MapData;
+import org.apache.flink.table.data.RawValueData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.data.TimestampData;
+import org.apache.flink.types.RowKind;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+
+@Internal
+public class StructRowData implements RowData {
+  private final Types.StructType type;
+  private RowKind kind;
+  private StructLike struct;
+
+  public StructRowData(Types.StructType type) {
+this(type, RowKind.INSERT);
+  }
+
+  public StructRowData(Types.StructType type, RowKind kind) {
+this(type, null, kind);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct) {
+this(type, struct, RowKind.INSERT);
+  }
+
+  private StructRowData(Types.StructType type, StructLike struct, RowKind 
kind) {
+this.type = type;
+this.struct = struct;
+this.kind = kind;
+  }
+
+  public StructRowData setStruct(StructLike newStruct) {
+this.struct = newStruct;
+return this;
+  }
+
+  @Override
+  public int getArity() {
+return struct.size();
+  }
+
+  @Override
+  public RowKind getRowKind() {
+return kind;
+  }
+
+  @Override
+  public void setRowKind(RowKind newKind) {
+Preconditions.checkNotNull(newKind, "kind can not be null");
+this.kind = newKind;
+  }
+
+  @Override
+  public boolean isNullAt(int pos) {
+return struct.get(pos, Object.class) == null;
+  }
+
+  @Override
+  public boolean getBoolean(int pos) {
+return struct.get(pos, Boolean.class);
+  }
+
+  @Override
+  public byte getByte(int pos) {
+return (byte) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public short getShort(int pos) {
+return (short) (int) struct.get(pos, Integer.class);
+  }
+
+  @Override
+  public int getInt(int pos) {
+Object integer = struct.get(pos, Object.class);
+
+if (integer instanceof Integer) {
+  return (int) integer;
+} else if (integer instanceof LocalDate) {
+  return (int) ((LocalDate) integer).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for int field. Type name: " + 
integer.getClass().getName());
+}
+  }
+
+  @Override
+  public long getLong(int pos) {
+Object longVal = struct.get(pos, Object.class);
+
+if (longVal instanceof Long) {
+  return (long) longVal;
+} else if (longVal instanceof OffsetDateTime) {
+  return Duration.between(Instant.EPOCH, (OffsetDateTime) 
longVal).toNanos() / 1000;
+} else if (longVal instanceof LocalDate) {
+  return ((LocalDate) longVal).toEpochDay();
+} else {
+  throw new IllegalStateException(
+  "Unknown type for long field. Type name: " + 
longVal.getClass().getName());
+}
+  }
+
+  @Override
+  public float getFloat(int pos) {
+return struct.get(pos, Float.class);
+  }
+
+  @Override
+  public double getDouble(int pos) {
+return struct.get(pos, Double.class);
+  }
+
+  @Override
+  public StringData getString(int pos) {
+return isNullAt(pos) ? 

[GitHub] [iceberg] blcksrx commented on pull request #4465: Add FileIO implementation for Azure Blob Storage

2022-12-17 Thread GitBox


blcksrx commented on PR #4465:
URL: https://github.com/apache/iceberg/pull/4465#issuecomment-1356227315

   Any plan to merge this PR?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg-docs] InvisibleProgrammer opened a new pull request, #191: Fix sidebar

2022-12-17 Thread GitBox


InvisibleProgrammer opened a new pull request, #191:
URL: https://github.com/apache/iceberg-docs/pull/191

   Reason and sample screenshots: https://github.com/apache/iceberg/issues/6381
   
   This provides a fix for an issue in that the sidebar menu completely 
disappears from the documentation side on a resolution that is lower than 
1280px.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] InvisibleProgrammer commented on pull request #6379: Docs: Update Iceberg Hive documentation - 1.0.x (#6337)

2022-12-17 Thread GitBox


InvisibleProgrammer commented on PR #6379:
URL: https://github.com/apache/iceberg/pull/6379#issuecomment-1356327476

   Hey there!
   
   With the help of @pvary , all updates for the older releases are merged. 
   
   I wonder, do you know what is the process of releasing the merged changes? 
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] maximethebault commented on issue #6224: Spark: regression / query failure with Iceberg 1.0.0 and UNION

2022-12-17 Thread GitBox


maximethebault commented on issue #6224:
URL: https://github.com/apache/iceberg/issues/6224#issuecomment-1356353314

   Thanks for investigating this issue further!
   I'll go ahead and close this issue since it isn't Iceberg-related. I'll make 
sure to keep an eye on the Spark issue you created.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] maximethebault closed issue #6224: Spark: regression / query failure with Iceberg 1.0.0 and UNION

2022-12-17 Thread GitBox


maximethebault closed issue #6224: Spark: regression / query failure with 
Iceberg 1.0.0 and UNION
URL: https://github.com/apache/iceberg/issues/6224


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] dennishuo commented on pull request #6428: Add new SnowflakeCatalog implementation to enable directly using Snowflake-managed Iceberg tables

2022-12-17 Thread GitBox


dennishuo commented on PR #6428:
URL: https://github.com/apache/iceberg/pull/6428#issuecomment-1356445009

   Interesting, not sure why it removed @danielcweeks when I re-requested 
review from @nastra (definitely wasn't intentional -- as far as I can tell my 
repository permissions don't even provide the ability to remove reviewer 
requests). I wonder if this is possibly another manifestation of 
https://github.com/community/community/discussions/8939


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] github-actions[bot] commented on issue #5086: Add a connector for Python Flink DataStream API

2022-12-17 Thread GitBox


github-actions[bot] commented on issue #5086:
URL: https://github.com/apache/iceberg/issues/5086#issuecomment-1356511133

   This issue has been automatically marked as stale because it has been open 
for 180 days with no activity. It will be closed in next 14 days if no further 
activity occurs. To permanently prevent this issue from being considered stale, 
add the label 'not-stale', but commenting on the issue is preferred when 
possible.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] github-actions[bot] commented on issue #4971: new snapshot file(../metadata/snap-xxx.avro) will be generated as checkpoint commit, even though there is no data from cdc

2022-12-17 Thread GitBox


github-actions[bot] commented on issue #4971:
URL: https://github.com/apache/iceberg/issues/4971#issuecomment-1356511150

   This issue has been automatically marked as stale because it has been open 
for 180 days with no activity. It will be closed in next 14 days if no further 
activity occurs. To permanently prevent this issue from being considered stale, 
add the label 'not-stale', but commenting on the issue is preferred when 
possible.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types

2022-12-17 Thread GitBox


amogh-jahagirdar commented on code in PR #3024:
URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051508734


##
arrow/src/main/java/org/apache/iceberg/arrow/vectorized/ArrowVectorAccessors.java:
##
@@ -31,7 +33,7 @@ final class ArrowVectorAccessors {
   static {
 factory =
 new GenericArrowVectorAccessorFactory<>(
-throwingSupplier("Decimal type is not supported"),
+JavaDecimalFactory::new,
 JavaStringFactory::new,
 throwingSupplier("Struct type is not supported"),
 throwingSupplier("List type is not supported"));

Review Comment:
   Curious is  there an open issue or existing PR on support for struct/list 
types? I wasn't able to find any 



##
arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java:
##
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.arrow;
+
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
+import java.util.function.IntConsumer;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor;
+import org.apache.iceberg.arrow.vectorized.VectorHolder;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+/** This converts dictionary encoded arrow vectors to a correctly typed arrow 
vector. */
+public class DictEncodedArrowConverter {
+
+  private DictEncodedArrowConverter() {}
+
+  @SuppressWarnings("checkstyle:CyclomaticComplexity")
+  public static FieldVector toArrowVector(
+  VectorHolder vectorHolder, ArrowVectorAccessor 
accessor) {
+Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be 
null");
+Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot 
be null");
+if (vectorHolder.isDictionaryEncoded()) {
+  if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) {
+return toDecimalVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) {
+return toTimestampVector(vectorHolder, accessor);
+  } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) 
{
+return toBigIntVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) {
+return toFloat4Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) {
+return toFloat8Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) {
+return toVarCharVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) {
+return toVarBinaryVector(vectorHolder, accessor);
+  } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) 
{
+return toTimeMicroVector(vectorHolder, accessor);
+  }
+
+  throw new IllegalArgumentException(
+  String.format(
+  "Cannot convert dict encoded field '%s' of type '%s' to Arrow "
+  + "vector as it is currently not supported",
+  vectorHolder.icebergField().name(), 
vectorHolder.icebergType().typeId()));

Review Comment:
   I think this case would get hit even in the case the vector is not 
dictionary encoded and we'd be surfacing a misleading error message, would it 
make sense to throw a different exception to

[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types

2022-12-17 Thread GitBox


amogh-jahagirdar commented on code in PR #3024:
URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051508734


##
arrow/src/main/java/org/apache/iceberg/arrow/vectorized/ArrowVectorAccessors.java:
##
@@ -31,7 +33,7 @@ final class ArrowVectorAccessors {
   static {
 factory =
 new GenericArrowVectorAccessorFactory<>(
-throwingSupplier("Decimal type is not supported"),
+JavaDecimalFactory::new,
 JavaStringFactory::new,
 throwingSupplier("Struct type is not supported"),
 throwingSupplier("List type is not supported"));

Review Comment:
   Curious would you happen to know if there is an open issue or existing PR on 
support for struct/list types? I wasn't able to find any 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types

2022-12-17 Thread GitBox


amogh-jahagirdar commented on code in PR #3024:
URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051509275


##
arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java:
##
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.arrow;
+
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
+import java.util.function.IntConsumer;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor;
+import org.apache.iceberg.arrow.vectorized.VectorHolder;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+/** This converts dictionary encoded arrow vectors to a correctly typed arrow 
vector. */
+public class DictEncodedArrowConverter {
+
+  private DictEncodedArrowConverter() {}
+
+  @SuppressWarnings("checkstyle:CyclomaticComplexity")
+  public static FieldVector toArrowVector(
+  VectorHolder vectorHolder, ArrowVectorAccessor 
accessor) {
+Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be 
null");
+Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot 
be null");
+if (vectorHolder.isDictionaryEncoded()) {
+  if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) {
+return toDecimalVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) {
+return toTimestampVector(vectorHolder, accessor);
+  } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) 
{
+return toBigIntVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) {
+return toFloat4Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) {
+return toFloat8Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) {
+return toVarCharVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) {
+return toVarBinaryVector(vectorHolder, accessor);
+  } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) 
{
+return toTimeMicroVector(vectorHolder, accessor);
+  }
+
+  throw new IllegalArgumentException(
+  String.format(
+  "Cannot convert dict encoded field '%s' of type '%s' to Arrow "
+  + "vector as it is currently not supported",
+  vectorHolder.icebergField().name(), 
vectorHolder.icebergType().typeId()));

Review Comment:
   I think this case would get hit  in the case the vector is not dictionary 
encoded and we'd be surfacing a misleading error message, would it make sense 
to throw a different exception to indicate that this converter should not be 
used for non dictionary encoded vectors? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org



[GitHub] [iceberg] amogh-jahagirdar commented on a diff in pull request #3024: Arrow: Convert dict encoded vectors to their expected Arrow vector types

2022-12-17 Thread GitBox


amogh-jahagirdar commented on code in PR #3024:
URL: https://github.com/apache/iceberg/pull/3024#discussion_r1051509275


##
arrow/src/main/java/org/apache/iceberg/arrow/DictEncodedArrowConverter.java:
##
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.arrow;
+
+import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
+import java.util.function.IntConsumer;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.iceberg.arrow.vectorized.ArrowVectorAccessor;
+import org.apache.iceberg.arrow.vectorized.VectorHolder;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+/** This converts dictionary encoded arrow vectors to a correctly typed arrow 
vector. */
+public class DictEncodedArrowConverter {
+
+  private DictEncodedArrowConverter() {}
+
+  @SuppressWarnings("checkstyle:CyclomaticComplexity")
+  public static FieldVector toArrowVector(
+  VectorHolder vectorHolder, ArrowVectorAccessor 
accessor) {
+Preconditions.checkArgument(null != vectorHolder, "VectorHolder cannot be 
null");
+Preconditions.checkArgument(null != accessor, "ArrowVectorAccessor cannot 
be null");
+if (vectorHolder.isDictionaryEncoded()) {
+  if (Type.TypeID.DECIMAL.equals(vectorHolder.icebergType().typeId())) {
+return toDecimalVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.TIMESTAMP.equals(vectorHolder.icebergType().typeId())) {
+return toTimestampVector(vectorHolder, accessor);
+  } else if (Type.TypeID.LONG.equals(vectorHolder.icebergType().typeId())) 
{
+return toBigIntVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.FLOAT.equals(vectorHolder.icebergType().typeId())) {
+return toFloat4Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.DOUBLE.equals(vectorHolder.icebergType().typeId())) {
+return toFloat8Vector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.STRING.equals(vectorHolder.icebergType().typeId())) {
+return toVarCharVector(vectorHolder, accessor);
+  } else if 
(Type.TypeID.BINARY.equals(vectorHolder.icebergType().typeId())) {
+return toVarBinaryVector(vectorHolder, accessor);
+  } else if (Type.TypeID.TIME.equals(vectorHolder.icebergType().typeId())) 
{
+return toTimeMicroVector(vectorHolder, accessor);
+  }
+
+  throw new IllegalArgumentException(
+  String.format(
+  "Cannot convert dict encoded field '%s' of type '%s' to Arrow "
+  + "vector as it is currently not supported",
+  vectorHolder.icebergField().name(), 
vectorHolder.icebergType().typeId()));

Review Comment:
   I think this case would get hit  in the case the vector is not dictionary 
encoded and we'd be surfacing a misleading error message, would it make sense 
to have a different exception message to indicate that this converter should 
not be used for non dictionary encoded vectors? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org