dillitz commented on code in PR #1212:
URL: https://github.com/apache/iceberg-go/pull/1212#discussion_r3429868041


##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser).
+func encodeLiteral(lit Literal) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               return json.Marshal(float64(l))

Review Comment:
   I think this errors on non-finite values (`json: unsupported value: NaN` / 
`+Inf`), so a float predicate with a `NaN`/`Inf` bound can't be serialized at 
all. The parse side also rejects the bare `NaN`/`Infinity` tokens Java emits. 
These are constructible via `NewLiteral`, so it's reachable, though uncommon 
(NaN checks usually go through `is-nan`/`not-nan`). Might be worth deciding the 
policy explicitly: return a wrapped error, or special-case them to match Java. 
Worth noting Java's bare `NaN`/`Infinity` tokens aren't valid standard JSON 
anyway.



##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser).
+func encodeLiteral(lit Literal) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               return json.Marshal(float64(l))
+       case StringLiteral:
+               return json.Marshal(string(l))
+       case DateLiteral:
+               return json.Marshal(Date(l).ToTime().Format("2006-01-02"))
+       case TimeLiteral:
+               // "9"s trim trailing fractional zeros (and the point when 
zero), as Java does.
+               return 
json.Marshal(time.UnixMicro(int64(l)).UTC().Format("15:04:05.999999"))
+       case TimestampLiteral:
+               return 
json.Marshal(Timestamp(l).ToTime().Format("2006-01-02T15:04:05.999999"))
+       case TimestampNsLiteral:
+               return 
json.Marshal(TimestampNano(l).ToTime().Format("2006-01-02T15:04:05.999999999"))

Review Comment:
   I believe `timestamptz` columns serialize incorrectly here. `encodeLiteral` 
switches on the Go literal type, and both `timestamp` and `timestamptz` values 
are carried by `TimestampLiteral` (its `Type()` always reports `timestamp`, and 
`To(TimestampTzType)` returns the literal unchanged), so the timestamp branch 
formats with no zone offset.
   
   Java appends a `+00:00` offset for `shouldAdjustToUTC` types: the 
[`SingleValueParser` TIMESTAMP 
case](https://github.com/apache/iceberg/blob/85ffa1984e115e80ba1571f3eb017fcf0ba39031/core/src/main/java/org/apache/iceberg/SingleValueParser.java#L297-L305)
 routes to 
[`microsToIsoTimestamptz`](https://github.com/apache/iceberg/blob/85ffa1984e115e80ba1571f3eb017fcf0ba39031/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java#L148-L150),
 which formats with the offset. So for a `timestamptz` filter value:
   
   - Go emits `"2022-08-14T10:00:00"` where Java emits 
`"2022-08-14T10:00:00+00:00"`, so the bytes don't match.
   - It doesn't round-trip its own output either: parsing that offset-less 
string back with a tz schema fails, since `To(TimestampTzType)` goes through 
`time.RFC3339` and needs the offset.
   - A server parsing it with `isoTimestamptzToMicros` (`OffsetDateTime.parse`) 
needs the offset too, so it would reject the filter.



##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser).
+func encodeLiteral(lit Literal) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               return json.Marshal(float64(l))
+       case StringLiteral:
+               return json.Marshal(string(l))
+       case DateLiteral:
+               return json.Marshal(Date(l).ToTime().Format("2006-01-02"))
+       case TimeLiteral:
+               // "9"s trim trailing fractional zeros (and the point when 
zero), as Java does.
+               return 
json.Marshal(time.UnixMicro(int64(l)).UTC().Format("15:04:05.999999"))
+       case TimestampLiteral:
+               return 
json.Marshal(Timestamp(l).ToTime().Format("2006-01-02T15:04:05.999999"))
+       case TimestampNsLiteral:
+               return 
json.Marshal(TimestampNano(l).ToTime().Format("2006-01-02T15:04:05.999999999"))
+       case UUIDLiteral:
+               return json.Marshal(uuid.UUID(l).String())
+       case FixedLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case BinaryLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case DecimalLiteral:
+               return json.Marshal(Decimal(l).String())
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize literal of type 
%s", ErrInvalidArgument, lit.Type())
+       }
+}
+
+func literalOf(e BooleanExpression) (Literal, error) {
+       switch p := e.(type) {
+       case *unboundLiteralPredicate:
+               return p.lit, nil
+       case BoundLiteralPredicate:
+               return p.Literal(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a literal predicate, got 
%T", ErrInvalidArgument, e)
+       }
+}
+
+func literalsOf(e BooleanExpression) ([]Literal, error) {
+       switch p := e.(type) {
+       case *unboundSetPredicate:
+               return p.lits.Members(), nil
+       case BoundSetPredicate:
+               return p.Literals().Members(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a set predicate, got %T", 
ErrInvalidArgument, e)
+       }
+}
+
+func decodeExpr(raw json.RawMessage, schema *Schema) (BooleanExpression, 
error) {
+       b := bytes.TrimSpace(raw)
+       if len(b) == 0 {
+               return nil, fmt.Errorf("%w: cannot parse expression from empty 
input", ErrInvalidArgument)
+       }
+
+       // A bare boolean is AlwaysTrue/AlwaysFalse.
+       if b[0] == 't' || b[0] == 'f' {
+               var bv bool
+               if err := json.Unmarshal(b, &bv); err != nil {
+                       return nil, fmt.Errorf("%w: cannot parse expression: 
%s", ErrInvalidArgument, err)
+               }
+               if bv {
+                       return AlwaysTrue{}, nil
+               }
+
+               return AlwaysFalse{}, nil
+       }
+
+       var node exprNode
+       if err := json.Unmarshal(b, &node); err != nil {
+               return nil, fmt.Errorf("%w: cannot parse expression: %s", 
ErrInvalidArgument, err)
+       }
+
+       // {"type":"literal","value":<bool>} is an alternate spelling of a 
constant.
+       if node.Type == "literal" {
+               var bv bool
+               if err := json.Unmarshal(node.Value, &bv); err != nil {
+                       return nil, fmt.Errorf("%w: cannot parse literal 
expression: %s", ErrInvalidArgument, err)
+               }
+               if bv {
+                       return AlwaysTrue{}, nil
+               }
+
+               return AlwaysFalse{}, nil
+       }
+
+       op, ok := jsonToOp[node.Type]
+       if !ok {
+               return nil, fmt.Errorf("%w: unknown expression type %q", 
ErrInvalidArgument, node.Type)
+       }
+
+       switch op {
+       case OpNot:
+               child, err := decodeExpr(node.Child, schema)
+               if err != nil {
+                       return nil, err
+               }
+
+               return NewNot(child), nil
+       case OpAnd, OpOr:
+               left, err := decodeExpr(node.Left, schema)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := decodeExpr(node.Right, schema)
+               if err != nil {
+                       return nil, err
+               }
+               if op == OpAnd {
+                       return NewAnd(left, right), nil
+               }
+
+               return NewOr(left, right), nil
+       }
+
+       return decodePredicate(op, node, schema)
+}
+
+func decodePredicate(op Operation, node exprNode, schema *Schema) 
(BooleanExpression, error) {
+       term, err := decodeTerm(node.Term)
+       if err != nil {
+               return nil, err
+       }
+
+       // Resolve the field type once; nil means schema-less.
+       var typ Type
+       if schema != nil {
+               bound, err := term.Bind(schema, false)
+               if err != nil {
+                       return nil, err
+               }
+               typ = bound.Type()
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               if len(node.Value) != 0 || node.Values != nil {
+                       return nil, fmt.Errorf("%w: unary predicate %s must not 
have a value", ErrInvalidArgument, node.Type)
+               }
+
+               return UnaryPredicate(op, term), nil
+       case op >= OpLT && op <= OpNotStartsWith:
+               if len(node.Value) == 0 {
+                       return nil, fmt.Errorf("%w: predicate %s is missing a 
value", ErrInvalidArgument, node.Type)
+               }
+               lit, err := decodeValue(node.Value, typ)
+               if err != nil {
+                       return nil, err
+               }
+
+               return LiteralPredicate(op, term, lit), nil
+       case op >= OpIn && op <= OpNotIn:
+               if node.Values == nil {
+                       return nil, fmt.Errorf("%w: predicate %s is missing 
values", ErrInvalidArgument, node.Type)
+               }

Review Comment:
   The unary branch rejects a stray `value`/`values`, but the literal-op branch 
doesn't reject a stray `values` and the set branch doesn't reject a stray 
`value`, so `{"type":"eq","term":"a","value":1,"values":[1,2]}` parses to 
`eq(a,1)` instead of erroring. Java rejects all three combinations 
([`predicateFromJson`](https://github.com/apache/iceberg/blob/85ffa1984e115e80ba1571f3eb017fcf0ba39031/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java#L331-L375)).



##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }
+func (p *unboundLiteralPredicate) MarshalJSON() ([]byte, error) { return 
encodeExpr(p) }
+func (p *unboundSetPredicate) MarshalJSON() ([]byte, error)     { return 
encodeExpr(p) }
+
+// ParseExpr parses an expression from its REST JSON form (a request "filter" 
or
+// a task's residual filter).
+//
+// With a schema, literals take the referenced field's type (e.g. "2022-08-14"
+// on a date column becomes a DateLiteral). Without one they fall back to the
+// base JSON kind: Int64Literal, Float64Literal, StringLiteral, or BoolLiteral.
+func ParseExpr(data []byte, schema *Schema) (BooleanExpression, error) {
+       return decodeExpr(json.RawMessage(data), schema)
+}
+
+func encodeExpr(e BooleanExpression) (json.RawMessage, error) {
+       switch v := e.(type) {
+       case AlwaysTrue:
+               return json.RawMessage("true"), nil
+       case AlwaysFalse:
+               return json.RawMessage("false"), nil
+       case NotExpr:
+               child, err := encodeExpr(v.child)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpNot], Child: 
child})
+       case AndExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpAnd], Left: left, 
Right: right})
+       case OrExpr:
+               left, err := encodeExpr(v.left)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := encodeExpr(v.right)
+               if err != nil {
+                       return nil, err
+               }
+
+               return json.Marshal(exprNode{Type: opToJSON[OpOr], Left: left, 
Right: right})
+       }
+
+       return encodePredicate(e)
+}
+
+func encodePredicate(e BooleanExpression) (json.RawMessage, error) {
+       op := e.Op()
+       js, ok := opToJSON[op]
+       if !ok {
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       var (
+               term Term
+               err  error
+       )
+       switch p := e.(type) {
+       case UnboundPredicate:
+               term = p.Term()
+       case BoundPredicate:
+               term = p.Term()
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression of type 
%T", ErrInvalidArgument, e)
+       }
+
+       node := exprNode{Type: js}
+       if node.Term, err = encodeTerm(term); err != nil {
+               return nil, err
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               // unary predicate: no value or values field
+       case op >= OpLT && op <= OpNotStartsWith:
+               lit, err := literalOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               if node.Value, err = encodeLiteral(lit); err != nil {
+                       return nil, err
+               }
+       case op >= OpIn && op <= OpNotIn:
+               lits, err := literalsOf(e)
+               if err != nil {
+                       return nil, err
+               }
+               node.Values = make([]json.RawMessage, 0, len(lits))
+               for _, l := range lits {
+                       v, err := encodeLiteral(l)
+                       if err != nil {
+                               return nil, err
+                       }
+                       node.Values = append(node.Values, v)
+               }
+               // A set has no order; sort the encoded values for 
deterministic output.
+               sort.Slice(node.Values, func(i, j int) bool {
+                       return bytes.Compare(node.Values[i], node.Values[j]) < 0
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize expression with 
operation %s", ErrInvalidArgument, op)
+       }
+
+       return json.Marshal(node)
+}
+
+func encodeTerm(term Term) (json.RawMessage, error) {
+       switch t := term.(type) {
+       case Reference:
+               return json.Marshal(string(t))
+       case BoundReference:
+               return json.Marshal(t.Field().Name)
+       case *BoundTransform:
+               return json.Marshal(transformNode{
+                       Type:      exprKeyTransform,
+                       Transform: t.transform.String(),
+                       Term:      t.term.Ref().Field().Name,
+               })
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize term of type %T", 
ErrInvalidArgument, term)
+       }
+}
+
+// encodeLiteral writes a non-null literal in the JSON form for its Iceberg 
type
+// (see Java's SingleValueParser).
+func encodeLiteral(lit Literal) (json.RawMessage, error) {
+       switch l := lit.(type) {
+       case BoolLiteral:
+               return json.Marshal(bool(l))
+       case Int32Literal:
+               return json.Marshal(int32(l))
+       case Int64Literal:
+               return json.Marshal(int64(l))
+       case Float32Literal:
+               return json.Marshal(float32(l))
+       case Float64Literal:
+               return json.Marshal(float64(l))
+       case StringLiteral:
+               return json.Marshal(string(l))
+       case DateLiteral:
+               return json.Marshal(Date(l).ToTime().Format("2006-01-02"))
+       case TimeLiteral:
+               // "9"s trim trailing fractional zeros (and the point when 
zero), as Java does.
+               return 
json.Marshal(time.UnixMicro(int64(l)).UTC().Format("15:04:05.999999"))
+       case TimestampLiteral:
+               return 
json.Marshal(Timestamp(l).ToTime().Format("2006-01-02T15:04:05.999999"))
+       case TimestampNsLiteral:
+               return 
json.Marshal(TimestampNano(l).ToTime().Format("2006-01-02T15:04:05.999999999"))
+       case UUIDLiteral:
+               return json.Marshal(uuid.UUID(l).String())
+       case FixedLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case BinaryLiteral:
+               return 
json.Marshal(strings.ToUpper(hex.EncodeToString([]byte(l))))
+       case DecimalLiteral:
+               return json.Marshal(Decimal(l).String())
+       default:
+               return nil, fmt.Errorf("%w: cannot serialize literal of type 
%s", ErrInvalidArgument, lit.Type())
+       }
+}
+
+func literalOf(e BooleanExpression) (Literal, error) {
+       switch p := e.(type) {
+       case *unboundLiteralPredicate:
+               return p.lit, nil
+       case BoundLiteralPredicate:
+               return p.Literal(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a literal predicate, got 
%T", ErrInvalidArgument, e)
+       }
+}
+
+func literalsOf(e BooleanExpression) ([]Literal, error) {
+       switch p := e.(type) {
+       case *unboundSetPredicate:
+               return p.lits.Members(), nil
+       case BoundSetPredicate:
+               return p.Literals().Members(), nil
+       default:
+               return nil, fmt.Errorf("%w: expected a set predicate, got %T", 
ErrInvalidArgument, e)
+       }
+}
+
+func decodeExpr(raw json.RawMessage, schema *Schema) (BooleanExpression, 
error) {
+       b := bytes.TrimSpace(raw)
+       if len(b) == 0 {
+               return nil, fmt.Errorf("%w: cannot parse expression from empty 
input", ErrInvalidArgument)
+       }
+
+       // A bare boolean is AlwaysTrue/AlwaysFalse.
+       if b[0] == 't' || b[0] == 'f' {
+               var bv bool
+               if err := json.Unmarshal(b, &bv); err != nil {
+                       return nil, fmt.Errorf("%w: cannot parse expression: 
%s", ErrInvalidArgument, err)
+               }
+               if bv {
+                       return AlwaysTrue{}, nil
+               }
+
+               return AlwaysFalse{}, nil
+       }
+
+       var node exprNode
+       if err := json.Unmarshal(b, &node); err != nil {
+               return nil, fmt.Errorf("%w: cannot parse expression: %s", 
ErrInvalidArgument, err)
+       }
+
+       // {"type":"literal","value":<bool>} is an alternate spelling of a 
constant.
+       if node.Type == "literal" {
+               var bv bool
+               if err := json.Unmarshal(node.Value, &bv); err != nil {
+                       return nil, fmt.Errorf("%w: cannot parse literal 
expression: %s", ErrInvalidArgument, err)
+               }
+               if bv {
+                       return AlwaysTrue{}, nil
+               }
+
+               return AlwaysFalse{}, nil
+       }
+
+       op, ok := jsonToOp[node.Type]
+       if !ok {
+               return nil, fmt.Errorf("%w: unknown expression type %q", 
ErrInvalidArgument, node.Type)
+       }
+
+       switch op {
+       case OpNot:
+               child, err := decodeExpr(node.Child, schema)
+               if err != nil {
+                       return nil, err
+               }
+
+               return NewNot(child), nil
+       case OpAnd, OpOr:
+               left, err := decodeExpr(node.Left, schema)
+               if err != nil {
+                       return nil, err
+               }
+               right, err := decodeExpr(node.Right, schema)
+               if err != nil {
+                       return nil, err
+               }
+               if op == OpAnd {
+                       return NewAnd(left, right), nil
+               }
+
+               return NewOr(left, right), nil
+       }
+
+       return decodePredicate(op, node, schema)
+}
+
+func decodePredicate(op Operation, node exprNode, schema *Schema) 
(BooleanExpression, error) {
+       term, err := decodeTerm(node.Term)
+       if err != nil {
+               return nil, err
+       }
+
+       // Resolve the field type once; nil means schema-less.
+       var typ Type
+       if schema != nil {
+               bound, err := term.Bind(schema, false)
+               if err != nil {
+                       return nil, err
+               }
+               typ = bound.Type()
+       }
+
+       switch {
+       case op >= OpIsNull && op <= OpNotNan:
+               if len(node.Value) != 0 || node.Values != nil {
+                       return nil, fmt.Errorf("%w: unary predicate %s must not 
have a value", ErrInvalidArgument, node.Type)
+               }
+
+               return UnaryPredicate(op, term), nil
+       case op >= OpLT && op <= OpNotStartsWith:
+               if len(node.Value) == 0 {
+                       return nil, fmt.Errorf("%w: predicate %s is missing a 
value", ErrInvalidArgument, node.Type)
+               }
+               lit, err := decodeValue(node.Value, typ)
+               if err != nil {
+                       return nil, err
+               }
+
+               return LiteralPredicate(op, term, lit), nil
+       case op >= OpIn && op <= OpNotIn:
+               if node.Values == nil {
+                       return nil, fmt.Errorf("%w: predicate %s is missing 
values", ErrInvalidArgument, node.Type)
+               }
+               lits := make([]Literal, 0, len(node.Values))
+               for _, v := range node.Values {
+                       lit, err := decodeValue(v, typ)
+                       if err != nil {
+                               return nil, err
+                       }
+                       lits = append(lits, lit)
+               }
+
+               return SetPredicate(op, term, lits), nil
+       default:
+               return nil, fmt.Errorf("%w: unsupported predicate operation 
%s", ErrInvalidArgument, op)
+       }
+}
+
+func decodeTerm(raw json.RawMessage) (UnboundTerm, error) {
+       b := bytes.TrimSpace(raw)
+       if len(b) == 0 {
+               return nil, fmt.Errorf("%w: predicate is missing a term", 
ErrInvalidArgument)
+       }
+
+       if b[0] == '"' {
+               var name string
+               if err := json.Unmarshal(b, &name); err != nil {
+                       return nil, fmt.Errorf("%w: cannot parse term: %s", 
ErrInvalidArgument, err)
+               }
+
+               return Reference(name), nil
+       }
+
+       var t transformNode
+       if err := json.Unmarshal(b, &t); err != nil {
+               return nil, fmt.Errorf("%w: cannot parse term: %s", 
ErrInvalidArgument, err)
+       }
+       switch t.Type {
+       case "reference":
+               // {"type":"reference","term":"name"}
+               return Reference(t.Term), nil
+       case exprKeyTransform:
+               return nil, fmt.Errorf("%w: transform terms are not supported 
when parsing expressions", ErrNotImplemented)

Review Comment:
   `encodeTerm` serializes a `*BoundTransform` to `{"type":"transform",...}`, 
but `decodeTerm` rejects that same shape with `ErrNotImplemented`. A transform 
term is a legal `Term` per the spec ([`Term` = `Reference | 
TransformTerm`](https://github.com/apache/iceberg/blob/85ffa1984e115e80ba1571f3eb017fcf0ba39031/open-api/rest-catalog-open-api.yaml#L2651-L2674)),
 and Java reads it back 
([`ExpressionParser.term()`](https://github.com/apache/iceberg/blob/85ffa1984e115e80ba1571f3eb017fcf0ba39031/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java#L404-L417)),
 so a legal residual filter whose term is a partition transform would fail to 
parse here. The NotImplemented error seems fine if we don't want to support 
this yet.



##########
expr_json.go:
##########
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+       "bytes"
+       "encoding/hex"
+       "encoding/json"
+       "fmt"
+       "sort"
+       "strings"
+       "time"
+
+       "github.com/google/uuid"
+)
+
+// JSON serialization for boolean expressions, used for the "filter" field of a
+// REST scan-planning request. Mirrors Java's ExpressionParser.
+
+// exprKeyTransform is the "type" value identifying a transform term.
+const exprKeyTransform = "transform"
+
+// opToJSON maps an Operation to its wire string (OpLTEQ -> "lt-eq"). OpTrue 
and
+// OpFalse are handled separately: they serialize as bare JSON booleans.
+var opToJSON = map[Operation]string{
+       OpIsNull:        "is-null",
+       OpNotNull:       "not-null",
+       OpIsNan:         "is-nan",
+       OpNotNan:        "not-nan",
+       OpLT:            "lt",
+       OpLTEQ:          "lt-eq",
+       OpGT:            "gt",
+       OpGTEQ:          "gt-eq",
+       OpEQ:            "eq",
+       OpNEQ:           "not-eq",
+       OpStartsWith:    "starts-with",
+       OpNotStartsWith: "not-starts-with",
+       OpIn:            "in",
+       OpNotIn:         "not-in",
+       OpNot:           "not",
+       OpAnd:           "and",
+       OpOr:            "or",
+}
+
+var jsonToOp = func() map[string]Operation {
+       m := make(map[string]Operation, len(opToJSON))
+       for op, s := range opToJSON {
+               m[s] = op
+       }
+
+       return m
+}()
+
+// exprNode is the wire form of an expression node. omitempty leaves only the
+// keys relevant to a given node; field order matches Java's output.
+type exprNode struct {
+       Type   string            `json:"type"`
+       Child  json.RawMessage   `json:"child,omitempty"`
+       Left   json.RawMessage   `json:"left,omitempty"`
+       Right  json.RawMessage   `json:"right,omitempty"`
+       Term   json.RawMessage   `json:"term,omitempty"`
+       Value  json.RawMessage   `json:"value,omitempty"`
+       Values []json.RawMessage `json:"values,omitempty"`
+}
+
+// transformNode is the wire form of a transform term, e.g.
+// {"type":"transform","transform":"bucket[16]","term":"id"}.
+type transformNode struct {
+       Type      string `json:"type"`
+       Transform string `json:"transform"`
+       Term      string `json:"term"`
+}
+
+// MarshalJSON emits the REST JSON form, so an expression can be used directly
+// as a request's "filter" field. Tag such fields omitempty to drop a nil one.
+func (e AlwaysTrue) MarshalJSON() ([]byte, error)  { return encodeExpr(e) }
+func (e AlwaysFalse) MarshalJSON() ([]byte, error) { return encodeExpr(e) }
+func (e NotExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e AndExpr) MarshalJSON() ([]byte, error)     { return encodeExpr(e) }
+func (e OrExpr) MarshalJSON() ([]byte, error)      { return encodeExpr(e) }
+
+func (p *unboundUnaryPredicate) MarshalJSON() ([]byte, error)   { return 
encodeExpr(p) }

Review Comment:
   As far as I understand, it could be problematic that `MarshalJSON` is 
defined only on the unbound predicate types, which makes `json.Marshal` of a 
*bound* expression fall through to default struct marshaling and silently 
returns `{}` with a nil error (the bound types' fields are unexported). The 
scan path serializes the unbound filter, so this isn't hit today, but 
`json.Marshal` quietly "succeeding" with `{}` is a sharp edge if anyone later 
marshals a bound expression. Worth having the bound predicate types either 
delegate to `encodeExpr` or return an error, rather than producing `{}`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to