zhjwpku commented on code in PR #166:
URL: https://github.com/apache/iceberg-cpp/pull/166#discussion_r2275415267


##########
src/iceberg/avro/avro_data_util.cc:
##########
@@ -451,4 +453,217 @@ Status AppendDatumToBuilder(const ::avro::NodePtr& 
avro_node,
                                     projected_schema, array_builder);
 }
 
+namespace {
+
+// ToAvroNodeVisitor uses 0 for null branch and 1 for value branch.
+constexpr int64_t kNullBranch = 0;
+constexpr int64_t kValueBranch = 1;
+
+}  // namespace
+
+Status ExtractDatumFromArray(const ::arrow::Array& array, int64_t index,
+                             ::avro::GenericDatum* datum) {
+  if (index < 0 || index >= array.length()) {
+    return InvalidArgument("Cannot extract datum from array at index {} of 
length {}",
+                           index, array.length());
+  }
+
+  if (array.IsNull(index)) {
+    if (!datum->isUnion()) [[unlikely]] {
+      return InvalidSchema("Cannot extract null to non-union type: {}",
+                           ::avro::toString(datum->type()));
+    }
+    datum->selectBranch(kNullBranch);
+    return {};
+  }
+
+  if (datum->isUnion()) {
+    datum->selectBranch(kValueBranch);
+  }
+
+  switch (array.type()->id()) {
+    case ::arrow::Type::BOOL: {
+      const auto& bool_array =
+          internal::checked_cast<const ::arrow::BooleanArray&>(array);
+      datum->value<bool>() = bool_array.Value(index);

Review Comment:
   Do we need to check if the GenericDatum type matches the arrow::Array type?



##########
src/iceberg/avro/avro_data_util.cc:
##########
@@ -451,4 +453,217 @@ Status AppendDatumToBuilder(const ::avro::NodePtr& 
avro_node,
                                     projected_schema, array_builder);
 }
 
+namespace {
+
+// ToAvroNodeVisitor uses 0 for null branch and 1 for value branch.
+constexpr int64_t kNullBranch = 0;
+constexpr int64_t kValueBranch = 1;
+
+}  // namespace
+
+Status ExtractDatumFromArray(const ::arrow::Array& array, int64_t index,
+                             ::avro::GenericDatum* datum) {
+  if (index < 0 || index >= array.length()) {
+    return InvalidArgument("Cannot extract datum from array at index {} of 
length {}",
+                           index, array.length());
+  }
+
+  if (array.IsNull(index)) {
+    if (!datum->isUnion()) [[unlikely]] {
+      return InvalidSchema("Cannot extract null to non-union type: {}",
+                           ::avro::toString(datum->type()));
+    }
+    datum->selectBranch(kNullBranch);
+    return {};
+  }
+
+  if (datum->isUnion()) {
+    datum->selectBranch(kValueBranch);
+  }
+
+  switch (array.type()->id()) {
+    case ::arrow::Type::BOOL: {
+      const auto& bool_array =
+          internal::checked_cast<const ::arrow::BooleanArray&>(array);
+      datum->value<bool>() = bool_array.Value(index);
+      return {};
+    }
+
+    case ::arrow::Type::INT32: {
+      const auto& int32_array = internal::checked_cast<const 
::arrow::Int32Array&>(array);
+      datum->value<int32_t>() = int32_array.Value(index);
+      return {};
+    }
+
+    case ::arrow::Type::INT64: {
+      const auto& int64_array = internal::checked_cast<const 
::arrow::Int64Array&>(array);
+      datum->value<int64_t>() = int64_array.Value(index);
+      return {};
+    }
+
+    case ::arrow::Type::FLOAT: {
+      const auto& float_array = internal::checked_cast<const 
::arrow::FloatArray&>(array);
+      datum->value<float>() = float_array.Value(index);
+      return {};
+    }
+
+    case ::arrow::Type::DOUBLE: {
+      const auto& double_array =
+          internal::checked_cast<const ::arrow::DoubleArray&>(array);
+      datum->value<double>() = double_array.Value(index);
+      return {};
+    }
+
+    case ::arrow::Type::STRING: {
+      const auto& string_array =
+          internal::checked_cast<const ::arrow::StringArray&>(array);
+      datum->value<std::string>() = string_array.GetString(index);
+      return {};
+    }
+
+    case ::arrow::Type::BINARY: {
+      const auto& binary_array =
+          internal::checked_cast<const ::arrow::BinaryArray&>(array);
+      std::string_view value = binary_array.GetView(index);
+      datum->value<std::vector<uint8_t>>().assign(
+          reinterpret_cast<const uint8_t*>(value.data()),
+          reinterpret_cast<const uint8_t*>(value.data()) + value.size());
+      return {};
+    }
+
+    case ::arrow::Type::FIXED_SIZE_BINARY: {
+      const auto& fixed_array =
+          internal::checked_cast<const ::arrow::FixedSizeBinaryArray&>(array);
+      std::string_view value = fixed_array.GetView(index);
+      auto& fixed_datum = datum->value<::avro::GenericFixed>();
+      fixed_datum.value().assign(
+          reinterpret_cast<const char*>(value.data()),

Review Comment:
   GenericFixed holds std::vector<uint8_t>, so cast to const uint8_t*?
   Or, can this be implemented using begin/end iterators same as decimal128?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to