wgtmac commented on code in PR #532: URL: https://github.com/apache/iceberg-cpp/pull/532#discussion_r2748692773
########## src/iceberg/expression/json_serde_internal.h: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <nlohmann/json_fwd.hpp> + +#include "iceberg/expression/expressions.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" + +/// \file iceberg/expression/json_serde_internal.h +/// JSON serialization and deserialization for expressions. + +namespace iceberg { + +/// \brief Converts an operation type string to an Expression::Operation. +/// +/// \param typeStr The operation type string +/// \return The corresponding Operation or an error if unknown +ICEBERG_EXPORT Result<Expression::Operation> OperationTypeFromString( + const std::string_view typeStr); + +/// \brief Converts an Expression::Operation to its string representation. +/// +/// \param op The operation to convert +/// \return The operation type string (e.g., "eq", "lt-eq", "is-null") +ICEBERG_EXPORT std::string_view ToStringOperationType(Expression::Operation op); + +/// \brief Deserializes a JSON object into an Expression. +/// +/// \param json A JSON object representing an expression +/// \return A shared pointer to the deserialized Expression or an error +ICEBERG_EXPORT Result<std::shared_ptr<Expression>> ExpressionFromJson( + const nlohmann::json& json); + +/// \brief Serializes an Expression into its JSON representation. +/// +/// \param expr The expression to serialize +/// \return A JSON object representing the expression +ICEBERG_EXPORT nlohmann::json ExpressionToJson(const Expression& expr); Review Comment: ```suggestion ICEBERG_EXPORT nlohmann::json ToJson(const Expression& expr); ``` This is consistent with serde impls. ########## src/iceberg/expression/json_serde.cc: ########## @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <format> +#include <string> +#include <utility> +#include <vector> + +#include <nlohmann/json.hpp> + +#include "iceberg/expression/json_serde_internal.h" +#include "iceberg/expression/literal.h" +#include "iceberg/util/checked_cast.h" +#include "iceberg/util/json_util_internal.h" +#include "iceberg/util/macros.h" + +namespace iceberg { +namespace { +// Expression type strings +constexpr std::string_view kTypeTrue = "true"; +constexpr std::string_view kTypeFalse = "false"; +constexpr std::string_view kTypeEq = "eq"; +constexpr std::string_view kTypeAnd = "and"; +constexpr std::string_view kTypeOr = "or"; +constexpr std::string_view kTypeNot = "not"; +constexpr std::string_view kTypeIn = "in"; +constexpr std::string_view kTypeNotIn = "not-in"; +constexpr std::string_view kTypeLt = "lt"; +constexpr std::string_view kTypeLtEq = "lt-eq"; +constexpr std::string_view kTypeGt = "gt"; +constexpr std::string_view kTypeGtEq = "gt-eq"; +constexpr std::string_view kTypeNotEq = "not-eq"; +constexpr std::string_view kTypeStartsWith = "starts-with"; +constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; +constexpr std::string_view kTypeIsNull = "is-null"; +constexpr std::string_view kTypeNotNull = "not-null"; +constexpr std::string_view kTypeIsNan = "is-nan"; +constexpr std::string_view kTypeNotNan = "not-nan"; +} // namespace + +bool IsUnaryOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIsNull: + case Expression::Operation::kNotNull: + case Expression::Operation::kIsNan: + case Expression::Operation::kNotNan: + return true; + default: + return false; + } +} + +bool IsSetOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIn: + case Expression::Operation::kNotIn: + return true; + default: + return false; + } +} + +Result<Expression::Operation> OperationTypeFromString(const std::string_view typeStr) { + if (typeStr == kTypeTrue) return Expression::Operation::kTrue; + if (typeStr == kTypeFalse) return Expression::Operation::kFalse; + if (typeStr == kTypeAnd) return Expression::Operation::kAnd; + if (typeStr == kTypeOr) return Expression::Operation::kOr; + if (typeStr == kTypeNot) return Expression::Operation::kNot; + if (typeStr == kTypeEq) return Expression::Operation::kEq; + if (typeStr == kTypeNotEq) return Expression::Operation::kNotEq; + if (typeStr == kTypeLt) return Expression::Operation::kLt; + if (typeStr == kTypeLtEq) return Expression::Operation::kLtEq; + if (typeStr == kTypeGt) return Expression::Operation::kGt; + if (typeStr == kTypeGtEq) return Expression::Operation::kGtEq; + if (typeStr == kTypeIn) return Expression::Operation::kIn; + if (typeStr == kTypeNotIn) return Expression::Operation::kNotIn; + if (typeStr == kTypeIsNull) return Expression::Operation::kIsNull; + if (typeStr == kTypeNotNull) return Expression::Operation::kNotNull; + if (typeStr == kTypeIsNan) return Expression::Operation::kIsNan; + if (typeStr == kTypeNotNan) return Expression::Operation::kNotNan; + if (typeStr == kTypeStartsWith) return Expression::Operation::kStartsWith; + if (typeStr == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; + + return JsonParseError("Unknown expression type: {}", typeStr); +} + +std::string_view ToStringOperationType(Expression::Operation op) { + switch (op) { + case Expression::Operation::kTrue: + return kTypeTrue; + case Expression::Operation::kFalse: + return kTypeFalse; + case Expression::Operation::kAnd: + return kTypeAnd; + case Expression::Operation::kOr: + return kTypeOr; + case Expression::Operation::kNot: + return kTypeNot; + case Expression::Operation::kEq: + return kTypeEq; + case Expression::Operation::kNotEq: + return kTypeNotEq; + case Expression::Operation::kLt: + return kTypeLt; + case Expression::Operation::kLtEq: + return kTypeLtEq; + case Expression::Operation::kGt: + return kTypeGt; + case Expression::Operation::kGtEq: + return kTypeGtEq; + case Expression::Operation::kIn: + return kTypeIn; + case Expression::Operation::kNotIn: + return kTypeNotIn; + case Expression::Operation::kIsNull: + return kTypeIsNull; + case Expression::Operation::kNotNull: + return kTypeNotNull; + case Expression::Operation::kIsNan: + return kTypeIsNan; + case Expression::Operation::kNotNan: + return kTypeNotNan; + case Expression::Operation::kStartsWith: + return kTypeStartsWith; + case Expression::Operation::kNotStartsWith: + return kTypeNotStartsWith; + default: + ICEBERG_CHECK_OR_DIE(false, "Unknown expression operation."); + } Review Comment: ```suggestion } std::unreachable(); ``` It is safer to remove `default` branch which enables the compiler to check missing new cases. The convention in this codebase is to use status and not use exception (except expressions.h which provides fluent api to create expression for users). ########## src/iceberg/expression/json_serde_internal.h: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <nlohmann/json_fwd.hpp> + +#include "iceberg/expression/expressions.h" Review Comment: ```suggestion ``` Let's avoid using this. ########## src/iceberg/expression/json_serde_internal.h: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <nlohmann/json_fwd.hpp> + +#include "iceberg/expression/expressions.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" + +/// \file iceberg/expression/json_serde_internal.h +/// JSON serialization and deserialization for expressions. + +namespace iceberg { + +/// \brief Converts an operation type string to an Expression::Operation. +/// +/// \param typeStr The operation type string +/// \return The corresponding Operation or an error if unknown +ICEBERG_EXPORT Result<Expression::Operation> OperationTypeFromString( + const std::string_view typeStr); + +/// \brief Converts an Expression::Operation to its string representation. +/// +/// \param op The operation to convert +/// \return The operation type string (e.g., "eq", "lt-eq", "is-null") +ICEBERG_EXPORT std::string_view ToStringOperationType(Expression::Operation op); Review Comment: ```suggestion ICEBERG_EXPORT std::string_view ToString(Expression::Operation op); ``` Less is more :) ########## src/iceberg/expression/json_serde.cc: ########## @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <format> +#include <string> +#include <utility> +#include <vector> + +#include <nlohmann/json.hpp> + +#include "iceberg/expression/json_serde_internal.h" +#include "iceberg/expression/literal.h" +#include "iceberg/util/checked_cast.h" +#include "iceberg/util/json_util_internal.h" +#include "iceberg/util/macros.h" + +namespace iceberg { +namespace { +// Expression type strings +constexpr std::string_view kTypeTrue = "true"; +constexpr std::string_view kTypeFalse = "false"; +constexpr std::string_view kTypeEq = "eq"; +constexpr std::string_view kTypeAnd = "and"; +constexpr std::string_view kTypeOr = "or"; +constexpr std::string_view kTypeNot = "not"; +constexpr std::string_view kTypeIn = "in"; +constexpr std::string_view kTypeNotIn = "not-in"; +constexpr std::string_view kTypeLt = "lt"; +constexpr std::string_view kTypeLtEq = "lt-eq"; +constexpr std::string_view kTypeGt = "gt"; +constexpr std::string_view kTypeGtEq = "gt-eq"; +constexpr std::string_view kTypeNotEq = "not-eq"; +constexpr std::string_view kTypeStartsWith = "starts-with"; +constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; +constexpr std::string_view kTypeIsNull = "is-null"; +constexpr std::string_view kTypeNotNull = "not-null"; +constexpr std::string_view kTypeIsNan = "is-nan"; +constexpr std::string_view kTypeNotNan = "not-nan"; +} // namespace + +bool IsUnaryOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIsNull: + case Expression::Operation::kNotNull: + case Expression::Operation::kIsNan: + case Expression::Operation::kNotNan: + return true; + default: + return false; + } +} + +bool IsSetOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIn: + case Expression::Operation::kNotIn: + return true; + default: + return false; + } +} + +Result<Expression::Operation> OperationTypeFromString(const std::string_view typeStr) { + if (typeStr == kTypeTrue) return Expression::Operation::kTrue; + if (typeStr == kTypeFalse) return Expression::Operation::kFalse; + if (typeStr == kTypeAnd) return Expression::Operation::kAnd; + if (typeStr == kTypeOr) return Expression::Operation::kOr; + if (typeStr == kTypeNot) return Expression::Operation::kNot; + if (typeStr == kTypeEq) return Expression::Operation::kEq; + if (typeStr == kTypeNotEq) return Expression::Operation::kNotEq; + if (typeStr == kTypeLt) return Expression::Operation::kLt; + if (typeStr == kTypeLtEq) return Expression::Operation::kLtEq; + if (typeStr == kTypeGt) return Expression::Operation::kGt; + if (typeStr == kTypeGtEq) return Expression::Operation::kGtEq; + if (typeStr == kTypeIn) return Expression::Operation::kIn; + if (typeStr == kTypeNotIn) return Expression::Operation::kNotIn; + if (typeStr == kTypeIsNull) return Expression::Operation::kIsNull; + if (typeStr == kTypeNotNull) return Expression::Operation::kNotNull; + if (typeStr == kTypeIsNan) return Expression::Operation::kIsNan; + if (typeStr == kTypeNotNan) return Expression::Operation::kNotNan; + if (typeStr == kTypeStartsWith) return Expression::Operation::kStartsWith; + if (typeStr == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; + + return JsonParseError("Unknown expression type: {}", typeStr); +} + +std::string_view ToStringOperationType(Expression::Operation op) { + switch (op) { + case Expression::Operation::kTrue: + return kTypeTrue; + case Expression::Operation::kFalse: + return kTypeFalse; + case Expression::Operation::kAnd: + return kTypeAnd; + case Expression::Operation::kOr: + return kTypeOr; + case Expression::Operation::kNot: + return kTypeNot; + case Expression::Operation::kEq: + return kTypeEq; + case Expression::Operation::kNotEq: + return kTypeNotEq; + case Expression::Operation::kLt: + return kTypeLt; + case Expression::Operation::kLtEq: + return kTypeLtEq; + case Expression::Operation::kGt: + return kTypeGt; + case Expression::Operation::kGtEq: + return kTypeGtEq; + case Expression::Operation::kIn: + return kTypeIn; + case Expression::Operation::kNotIn: + return kTypeNotIn; + case Expression::Operation::kIsNull: + return kTypeIsNull; + case Expression::Operation::kNotNull: + return kTypeNotNull; + case Expression::Operation::kIsNan: + return kTypeIsNan; + case Expression::Operation::kNotNan: + return kTypeNotNan; + case Expression::Operation::kStartsWith: + return kTypeStartsWith; + case Expression::Operation::kNotStartsWith: + return kTypeNotStartsWith; + default: + ICEBERG_CHECK_OR_DIE(false, "Unknown expression operation."); + } +} + +Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json) { + // Handle boolean + if (json.is_boolean()) { + return json.get<bool>() + ? internal::checked_pointer_cast<Expression>(True::Instance()) + : internal::checked_pointer_cast<Expression>(False::Instance()); + } + return JsonParseError("Only booleans are currently supported."); +} + +nlohmann::json ExpressionToJson(const Expression& expr) { + switch (expr.op()) { + case Expression::Operation::kTrue: + return true; + + case Expression::Operation::kFalse: + return false; + default: Review Comment: Please add a TODO comment to remove `ICEBERG_CHECK_OR_DIE` in the future because we cannot really throw here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
