twuebi commented on code in PR #1075: URL: https://github.com/apache/iceberg-go/pull/1075#discussion_r3259507794
########## data_file_codec.go: ########## @@ -0,0 +1,219 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "fmt" + "reflect" + "sync" + + "github.com/apache/iceberg-go/internal" + "github.com/twmb/avro" +) + +// MarshalAvroEntry encodes this DataFile as Avro bytes using the +// manifest-entry encoding for the given partition spec, table schema +// and format version (1, 2, or 3). The wire format is the same one a +// manifest carries for this data file, so adding a field to the +// underlying struct (and its avro tags) automatically extends what +// MarshalAvroEntry transports — there is no separate wire-mirror +// struct to keep in sync. +// +// MarshalAvroEntry is the low-level avro primitive used by the +// [github.com/apache/iceberg-go/codec] package; callers performing +// cross-process transport should use that package's high-level API +// rather than calling this method directly. The receiver MUST decode +// with [UnmarshalAvroDataFileEntry] and the matching +// (spec, schema, version) triple. +// +// MarshalAvroEntry is non-mutating and safe to call concurrently with +// any other reader or encoder of the same DataFile: it encodes a +// shallow copy of df's avro-tagged fields, leaving df untouched. +// +// distinct_counts round-trips on v1 and v2. The v3 manifest-entry +// schema omits the field (deprecated in the v3 spec, see +// apache/iceberg#12182), so it does not survive encode→decode on v3 — +// callers on v3 that need distinct counts must transport them +// separately. +func (d *dataFile) MarshalAvroEntry(spec PartitionSpec, schema *Schema, version int) ([]byte, error) { + if version < 1 || version > 3 { + return nil, fmt.Errorf("iceberg: MarshalAvroEntry: unsupported format version %d", version) + } + s, maps, err := manifestEntrySchemaFor(spec, schema, version) + if err != nil { + return nil, err + } + clone := cloneDataFileAvroFields(d) + clone.PartitionData = avroEncodePartitionData(d.Partition(), maps.nameToID, maps.idToType) + + return s.Encode(newEncodeEntry(version, clone)) +} + +// UnmarshalAvroDataFileEntry decodes Avro bytes produced by +// [(*dataFile).MarshalAvroEntry] back into a DataFile. The +// (spec, schema, version) triple must match the encoder; passing a +// different spec or version yields a decode error or silently +// mis-typed partition values. +// +// UnmarshalAvroDataFileEntry is the low-level avro primitive used by +// the [github.com/apache/iceberg-go/codec] package; callers performing +// cross-process transport should use that package's high-level API +// rather than calling this function directly. +// +// The returned DataFile carries the partition spec id and the field-id +// lookup tables, so Partition() and the stats accessors return id-keyed +// maps as if the file had been read from a manifest. +func UnmarshalAvroDataFileEntry(data []byte, spec PartitionSpec, schema *Schema, version int) (DataFile, error) { Review Comment: added the bridge -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
