zeroshade commented on code in PR #1212:
URL: https://github.com/apache/iceberg-go/pull/1212#discussion_r3439424082


##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))

Review Comment:
   I'm surprised there isn't a `maps.Invert` function in the stdlib lol



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:

Review Comment:
   Why not just have AlwaysTrue's MarshalJSON hard-coded as `return 
[]byte("true")`? 



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       // A bound term carries the field type, which a timestamptz literal 
needs to
+       // emit its +00:00 offset. Unbound terms leave it nil.
+       var typ Type
+       if bt, ok := term.(BoundTerm); ok {
+               typ = bt.Type()
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit, typ); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l, typ)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))

Review Comment:
   Same thing here, this logic can just live in the MarshalJSON methods for 
each of these types 



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       // A bound term carries the field type, which a timestamptz literal 
needs to
+       // emit its +00:00 offset. Unbound terms leave it nil.
+       var typ Type
+       if bt, ok := term.(BoundTerm); ok {
+               typ = bt.Type()
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:

Review Comment:
   Don't we have types for these that we can just implement MarshalJSON on 
instead of needing to explicitly switch on this?



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil

Review Comment:
   Same comment as above



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       // A bound term carries the field type, which a timestamptz literal 
needs to
+       // emit its +00:00 offset. Unbound terms leave it nil.
+       var typ Type
+       if bt, ok := term.(BoundTerm); ok {
+               typ = bt.Type()
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit, typ); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l, typ)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser). typ is the resolved field type, used to 
tell a
+// timestamptz literal from a plain timestamp; it may be nil for an unbound 
term.
+func encodeLiteral(lit Literal, typ Type) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:

Review Comment:
   Again, just add MarshalJSON methods to the literals instead of this



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)

Review Comment:
   Couldn't NotExpr's MarshalJSON just call MarshalJSON on its child and then 
do the return?
   
   Ultimately, why do we even need this giant encodeExpr function when we're 
just using a large switch anyways. Each individual bit can just live in the 
appropriate MarshalJSON function for that expr type instead of using this large 
switch



##########
expr_json.go:
##########
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "math"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// Bound predicates delegate to the same encoder; without these, json.Marshal 
of
+// a bound expression would fall through to {} since their fields are 
unexported.
+func (p *boundUnaryPredicate[T]) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *boundLiteralPredicate[T]) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *boundSetPredicate[T]) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       // A bound term carries the field type, which a timestamptz literal 
needs to
+       // emit its +00:00 offset. Unbound terms leave it nil.
+       var typ Type
+       if bt, ok := term.(BoundTerm); ok {
+               typ = bt.Type()
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit, typ); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l, typ)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser). typ is the resolved field type, used to 
tell a
+// timestamptz literal from a plain timestamp; it may be nil for an unbound 
term.
+func encodeLiteral(lit Literal, typ Type) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               if f := float64(l); math.IsInf(f, 0) || math.IsNaN(f) {
+                       return nil, fmt.Errorf("%w: cannot serialize non-finite 
float %v", ErrInvalidArgument, f)
+               }
+
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               if f := float64(l); math.IsInf(f, 0) || math.IsNaN(f) {
+                       return nil, fmt.Errorf("%w: cannot serialize non-finite 
float %v", ErrInvalidArgument, f)
+               }
+
+               return json.Marshal(float64(l))
+       case StringLiteral:
+               return json.Marshal(string(l))
+       case DateLiteral:
+               return json.Marshal(Date(l).ToTime().Format("2006-01-02"))
+       case TimeLiteral:
+               // "9"s trim trailing fractional zeros (and the point when 
zero), as Java does.
+               return 
json.Marshal(time.UnixMicro(int64(l)).UTC().Format("15:04:05.999999"))
+       case TimestampLiteral:
+               t := Timestamp(l).ToTime()
+               // timestamptz gets a +00:00 offset ("-07:00" prints it, 
"Z07:00" wouldn't).
+               if _, ok := typ.(TimestampTzType); ok {
+                       return 
json.Marshal(t.UTC().Format("2006-01-02T15:04:05.999999-07:00"))
+               }
+
+               return json.Marshal(t.Format("2006-01-02T15:04:05.999999"))
+       case TimestampNsLiteral:
+               t := TimestampNano(l).ToTime()
+               if _, ok := typ.(TimestampTzNsType); ok {
+                       return 
json.Marshal(t.UTC().Format("2006-01-02T15:04:05.999999999-07:00"))
+               }
+
+               return json.Marshal(t.Format("2006-01-02T15:04:05.999999999"))
+       case UUIDLiteral:
+               return json.Marshal(uuid.UUID(l).String())
+       case FixedLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case BinaryLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case DecimalLiteral:
+               return json.Marshal(Decimal(l).String())
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize literal of type 
%s", ErrInvalidArgument, lit.Type())
+       }
+}
+
+func literalOf(e BooleanExpression) (Literal, error) {
+       switch p := e.(type) {
+       case *unboundLiteralPredicate:
+               return p.lit, nil
+       case BoundLiteralPredicate:
+               return p.Literal(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a literal predicate, got 
%T", ErrInvalidArgument, e)
+       }
+}
+
+func literalsOf(e BooleanExpression) ([]Literal, error) {
+       switch p := e.(type) {
+       case *unboundSetPredicate:
+               return p.lits.Members(), nil
+       case BoundSetPredicate:
+               return p.Literals().Members(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a set predicate, got %T", 
ErrInvalidArgument, e)
+       }
+}
+
+func decodeExpr(raw json.RawMessage, schema *Schema) (BooleanExpression, 
error) {
+       b := bytes.TrimSpace(raw)

Review Comment:
   Use json.NewDecoder and get the tokens instead of having to deal with spaces 
and trying to parse the JSON yourself.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to