rambleraptor commented on code in PR #1212:
URL: https://github.com/apache/iceberg-go/pull/1212#discussion_r3431493589


##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser).
+func encodeLiteral(lit Literal) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               return json.Marshal(float64(l))
+       case StringLiteral:
+               return json.Marshal(string(l))
+       case DateLiteral:
+               return json.Marshal(Date(l).ToTime().Format("2006-01-02"))
+       case TimeLiteral:
+               // "9"s trim trailing fractional zeros (and the point when 
zero), as Java does.
+               return 
json.Marshal(time.UnixMicro(int64(l)).UTC().Format("15:04:05.999999"))
+       case TimestampLiteral:
+               return 
json.Marshal(Timestamp(l).ToTime().Format("2006-01-02T15:04:05.999999"))
+       case TimestampNsLiteral:
+               return 
json.Marshal(TimestampNano(l).ToTime().Format("2006-01-02T15:04:05.999999999"))

Review Comment:
   Great catch! Fixed this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to