This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 82821e574d arrow-ipc: Remove all abilities to preserve dict IDs (#7940)
82821e574d is described below
commit 82821e574df7e699c7a491da90c54429a5a439e9
Author: Frederic Branczyk <[email protected]>
AuthorDate: Fri Jul 18 22:32:41 2025 +0200
arrow-ipc: Remove all abilities to preserve dict IDs (#7940)
# Which issue does this PR close?
Does not yet close, but contributes towards:
- https://github.com/apache/arrow-rs/issues/6356
- https://github.com/apache/arrow-rs/issues/5981
- https://github.com/apache/arrow-rs/issues/1206
# Rationale for this change
See the above issues. And this is a follow up to
* https://github.com/apache/arrow-rs/pull/6711
* https://github.com/apache/arrow-rs/pull/6873
This was also split out from:
https://github.com/apache/arrow-rs/pull/7929
# What changes are included in this PR?
This removes the API to allow preserving `dict_id` set in the `Schema`'s
`Field` within arrow-ipc and arrow-flight. This is in an effort to
remove the `dict_id` field entirely and make it an IPC/flight-only
concern.
# Are these changes tested?
Yes, all existing tests continue to pass.
# Are there any user-facing changes?
Yes, these previously (in 54.0.0) deprecated functions/fields are
removed:
* `arrow_ipc::DictionaryTracker.set_dict_id`
* `arrow_ipc::DictionaryTracker::new_with_preserve_dict_id`
* `arrow_ipc::IpcWriteOptions.with_preserve_dict_id`
* `arrow_ipc::IpcWriteOptions.preserve_dict_id` (function and field)
* `arrow_ipc::schema_to_fb`
* `arrow_ipc::schema_to_bytes`
---
arrow-flight/src/encode.rs | 29 +---
arrow-flight/src/lib.rs | 4 +-
arrow-flight/src/utils.rs | 4 +-
.../flight_client_scenarios/integration_test.rs | 4 +-
.../flight_server_scenarios/integration_test.rs | 4 +-
arrow-ipc/src/convert.rs | 22 +--
arrow-ipc/src/reader.rs | 12 +-
arrow-ipc/src/reader/stream.rs | 3 +-
arrow-ipc/src/writer.rs | 190 ++++-----------------
parquet/src/arrow/schema/mod.rs | 4 +-
10 files changed, 55 insertions(+), 221 deletions(-)
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 0a7a6df904..49910a3ee2 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -535,15 +535,13 @@ fn prepare_field_for_flight(
)
.with_metadata(field.metadata().clone())
} else {
- #[allow(deprecated)]
- let dict_id = dictionary_tracker.set_dict_id(field.as_ref());
-
+ dictionary_tracker.next_dict_id();
#[allow(deprecated)]
Field::new_dict(
field.name(),
field.data_type().clone(),
field.is_nullable(),
- dict_id,
+ 0,
field.dict_is_ordered().unwrap_or_default(),
)
.with_metadata(field.metadata().clone())
@@ -585,14 +583,13 @@ fn prepare_schema_for_flight(
)
.with_metadata(field.metadata().clone())
} else {
- #[allow(deprecated)]
- let dict_id =
dictionary_tracker.set_dict_id(field.as_ref());
+ dictionary_tracker.next_dict_id();
#[allow(deprecated)]
Field::new_dict(
field.name(),
field.data_type().clone(),
field.is_nullable(),
- dict_id,
+ 0,
field.dict_is_ordered().unwrap_or_default(),
)
.with_metadata(field.metadata().clone())
@@ -654,16 +651,10 @@ struct FlightIpcEncoder {
impl FlightIpcEncoder {
fn new(options: IpcWriteOptions, error_on_replacement: bool) -> Self {
- #[allow(deprecated)]
- let preserve_dict_id = options.preserve_dict_id();
Self {
options,
data_gen: IpcDataGenerator::default(),
- #[allow(deprecated)]
- dictionary_tracker: DictionaryTracker::new_with_preserve_dict_id(
- error_on_replacement,
- preserve_dict_id,
- ),
+ dictionary_tracker: DictionaryTracker::new(error_on_replacement),
}
}
@@ -1547,9 +1538,8 @@ mod tests {
async fn verify_flight_round_trip(mut batches: Vec<RecordBatch>) {
let expected_schema = batches.first().unwrap().schema();
- #[allow(deprecated)]
let encoder = FlightDataEncoderBuilder::default()
-
.with_options(IpcWriteOptions::default().with_preserve_dict_id(false))
+ .with_options(IpcWriteOptions::default())
.with_dictionary_handling(DictionaryHandling::Resend)
.build(futures::stream::iter(batches.clone().into_iter().map(Ok)));
@@ -1575,8 +1565,7 @@ mod tests {
HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
);
- #[allow(deprecated)]
- let mut dictionary_tracker =
DictionaryTracker::new_with_preserve_dict_id(false, true);
+ let mut dictionary_tracker = DictionaryTracker::new(false);
let got = prepare_schema_for_flight(&schema, &mut dictionary_tracker,
false);
assert!(got.metadata().contains_key("some_key"));
@@ -1606,9 +1595,7 @@ mod tests {
options: &IpcWriteOptions,
) -> (Vec<FlightData>, FlightData) {
let data_gen = IpcDataGenerator::default();
- #[allow(deprecated)]
- let mut dictionary_tracker =
- DictionaryTracker::new_with_preserve_dict_id(false,
options.preserve_dict_id());
+ let mut dictionary_tracker = DictionaryTracker::new(false);
let (encoded_dictionaries, encoded_batch) = data_gen
.encoded_batch(batch, &mut dictionary_tracker, options)
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index c0af71aaf4..8043d5b4a7 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -149,9 +149,7 @@ pub struct IpcMessage(pub Bytes);
fn flight_schema_as_encoded_data(arrow_schema: &Schema, options:
&IpcWriteOptions) -> EncodedData {
let data_gen = writer::IpcDataGenerator::default();
- #[allow(deprecated)]
- let mut dict_tracker =
- writer::DictionaryTracker::new_with_preserve_dict_id(false,
options.preserve_dict_id());
+ let mut dict_tracker = writer::DictionaryTracker::new(false);
data_gen.schema_to_bytes_with_dictionary_tracker(arrow_schema, &mut
dict_tracker, options)
}
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 428dde73ca..a304aedcfa 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -90,9 +90,7 @@ pub fn batches_to_flight_data(
let mut flight_data = vec![];
let data_gen = writer::IpcDataGenerator::default();
- #[allow(deprecated)]
- let mut dictionary_tracker =
- writer::DictionaryTracker::new_with_preserve_dict_id(false,
options.preserve_dict_id());
+ let mut dictionary_tracker = writer::DictionaryTracker::new(false);
for batch in batches.iter() {
let (encoded_dictionaries, encoded_batch) =
diff --git
a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index 406419028d..bd41ab602e 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -72,9 +72,7 @@ async fn upload_data(
let (mut upload_tx, upload_rx) = mpsc::channel(10);
let options = arrow::ipc::writer::IpcWriteOptions::default();
- #[allow(deprecated)]
- let mut dict_tracker =
- writer::DictionaryTracker::new_with_preserve_dict_id(false,
options.preserve_dict_id());
+ let mut dict_tracker = writer::DictionaryTracker::new(false);
let data_gen = writer::IpcDataGenerator::default();
let data = IpcMessage(
data_gen
diff --git
a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index 92989a2039..d608a47537 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -119,9 +119,7 @@ impl FlightService for FlightServiceImpl {
.ok_or_else(|| Status::not_found(format!("Could not find flight.
{key}")))?;
let options = arrow::ipc::writer::IpcWriteOptions::default();
- #[allow(deprecated)]
- let mut dictionary_tracker =
- writer::DictionaryTracker::new_with_preserve_dict_id(false,
options.preserve_dict_id());
+ let mut dictionary_tracker = writer::DictionaryTracker::new(false);
let data_gen = writer::IpcDataGenerator::default();
let data = IpcMessage(
data_gen
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 0be74bf6d9..af0bdb1df3 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -19,6 +19,7 @@
use arrow_buffer::Buffer;
use arrow_schema::*;
+use core::panic;
use flatbuffers::{
FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, Verifiable,
Verifier,
VerifierOptions, WIPOffset,
@@ -127,12 +128,6 @@ impl<'a> IpcSchemaEncoder<'a> {
}
}
-/// Serialize a schema in IPC format
-#[deprecated(since = "54.0.0", note = "Use `IpcSchemaConverter`.")]
-pub fn schema_to_fb(schema: &Schema) -> FlatBufferBuilder<'_> {
- IpcSchemaEncoder::new().schema_to_fb(schema)
-}
-
/// Push a key-value metadata into a FlatBufferBuilder and return [WIPOffset]
pub fn metadata_to_fb<'a>(
fbb: &mut FlatBufferBuilder<'a>,
@@ -530,24 +525,13 @@ pub(crate) fn build_field<'a>(
match dictionary_tracker {
Some(tracker) => Some(get_fb_dictionary(
index_type,
- #[allow(deprecated)]
- tracker.set_dict_id(field),
- field
- .dict_is_ordered()
- .expect("All Dictionary types have `dict_is_ordered`"),
- fbb,
- )),
- None => Some(get_fb_dictionary(
- index_type,
- #[allow(deprecated)]
- field
- .dict_id()
- .expect("Dictionary type must have a dictionary id"),
+ tracker.next_dict_id(),
field
.dict_is_ordered()
.expect("All Dictionary types have `dict_is_ordered`"),
fbb,
)),
+ None => panic!("IPC must no longer be used without dictionary
tracker"),
}
} else {
None
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 919407dcda..de200a206d 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -2007,8 +2007,7 @@ mod tests {
let mut writer = crate::writer::FileWriter::try_new_with_options(
&mut buf,
batch.schema_ref(),
- #[allow(deprecated)]
- IpcWriteOptions::default().with_preserve_dict_id(false),
+ IpcWriteOptions::default(),
)
.unwrap();
writer.write(&batch).unwrap();
@@ -2440,8 +2439,7 @@ mod tests {
.unwrap();
let gen = IpcDataGenerator {};
- #[allow(deprecated)]
- let mut dict_tracker =
DictionaryTracker::new_with_preserve_dict_id(false, true);
+ let mut dict_tracker = DictionaryTracker::new(false);
let (_, encoded) = gen
.encoded_batch(&batch, &mut dict_tracker, &Default::default())
.unwrap();
@@ -2479,8 +2477,7 @@ mod tests {
.unwrap();
let gen = IpcDataGenerator {};
- #[allow(deprecated)]
- let mut dict_tracker =
DictionaryTracker::new_with_preserve_dict_id(false, true);
+ let mut dict_tracker = DictionaryTracker::new(false);
let (_, encoded) = gen
.encoded_batch(&batch, &mut dict_tracker, &Default::default())
.unwrap();
@@ -2691,8 +2688,7 @@ mod tests {
let mut writer = crate::writer::StreamWriter::try_new_with_options(
&mut buf,
batch.schema().as_ref(),
- #[allow(deprecated)]
-
crate::writer::IpcWriteOptions::default().with_preserve_dict_id(false),
+ crate::writer::IpcWriteOptions::default(),
)
.expect("Failed to create StreamWriter");
writer.write(&batch).expect("Failed to write RecordBatch");
diff --git a/arrow-ipc/src/reader/stream.rs b/arrow-ipc/src/reader/stream.rs
index e894678142..b276e4fe47 100644
--- a/arrow-ipc/src/reader/stream.rs
+++ b/arrow-ipc/src/reader/stream.rs
@@ -395,8 +395,7 @@ mod tests {
let mut writer = StreamWriter::try_new_with_options(
&mut buffer,
&schema,
- #[allow(deprecated)]
- IpcWriteOptions::default().with_preserve_dict_id(false),
+ IpcWriteOptions::default(),
)
.expect("Failed to create StreamWriter");
writer.write(&batch).expect("Failed to write RecordBatch");
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index bd255fd2d5..114f3a42e3 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -65,15 +65,6 @@ pub struct IpcWriteOptions {
/// Compression, if desired. Will result in a runtime error
/// if the corresponding feature is not enabled
batch_compression_type: Option<crate::CompressionType>,
- /// Flag indicating whether the writer should preserve the dictionary IDs
defined in the
- /// schema or generate unique dictionary IDs internally during encoding.
- ///
- /// Defaults to `false`
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all fields related to it."
- )]
- preserve_dict_id: bool,
}
impl IpcWriteOptions {
@@ -122,7 +113,6 @@ impl IpcWriteOptions {
write_legacy_ipc_format,
metadata_version,
batch_compression_type: None,
- preserve_dict_id: false,
}),
crate::MetadataVersion::V5 => {
if write_legacy_ipc_format {
@@ -130,13 +120,11 @@ impl IpcWriteOptions {
"Legacy IPC format only supported on metadata version
4".to_string(),
))
} else {
- #[allow(deprecated)]
Ok(Self {
alignment,
write_legacy_ipc_format,
metadata_version,
batch_compression_type: None,
- preserve_dict_id: false,
})
}
}
@@ -145,45 +133,15 @@ impl IpcWriteOptions {
))),
}
}
-
- /// Return whether the writer is configured to preserve the dictionary IDs
- /// defined in the schema
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all functions related to it."
- )]
- pub fn preserve_dict_id(&self) -> bool {
- #[allow(deprecated)]
- self.preserve_dict_id
- }
-
- /// Set whether the IPC writer should preserve the dictionary IDs in the
schema
- /// or auto-assign unique dictionary IDs during encoding (defaults to true)
- ///
- /// If this option is true, the application must handle assigning ids
- /// to the dictionary batches in order to encode them correctly
- ///
- /// The default will change to `false` in future releases
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all functions related to it."
- )]
- #[allow(deprecated)]
- pub fn with_preserve_dict_id(mut self, preserve_dict_id: bool) -> Self {
- self.preserve_dict_id = preserve_dict_id;
- self
- }
}
impl Default for IpcWriteOptions {
fn default() -> Self {
- #[allow(deprecated)]
Self {
alignment: 64,
write_legacy_ipc_format: false,
metadata_version: crate::MetadataVersion::V5,
batch_compression_type: None,
- preserve_dict_id: false,
}
}
}
@@ -224,10 +182,7 @@ pub struct IpcDataGenerator {}
impl IpcDataGenerator {
/// Converts a schema to an IPC message along with `dictionary_tracker`
- /// and returns it encoded inside [EncodedData] as a flatbuffer
- ///
- /// Preferred method over [IpcDataGenerator::schema_to_bytes] since it's
- /// deprecated since Arrow v54.0.0
+ /// and returns it encoded inside [EncodedData] as a flatbuffer.
pub fn schema_to_bytes_with_dictionary_tracker(
&self,
schema: &Schema,
@@ -258,36 +213,6 @@ impl IpcDataGenerator {
}
}
- #[deprecated(
- since = "54.0.0",
- note = "Use `schema_to_bytes_with_dictionary_tracker` instead. This
function signature of `schema_to_bytes_with_dictionary_tracker` in the next
release."
- )]
- /// Converts a schema to an IPC message and returns it encoded inside
[EncodedData] as a flatbuffer
- pub fn schema_to_bytes(&self, schema: &Schema, write_options:
&IpcWriteOptions) -> EncodedData {
- let mut fbb = FlatBufferBuilder::new();
- let schema = {
- #[allow(deprecated)]
- // This will be replaced with the IpcSchemaConverter in the next
release.
- let fb = crate::convert::schema_to_fb_offset(&mut fbb, schema);
- fb.as_union_value()
- };
-
- let mut message = crate::MessageBuilder::new(&mut fbb);
- message.add_version(write_options.metadata_version);
- message.add_header_type(crate::MessageHeader::Schema);
- message.add_bodyLength(0);
- message.add_header(schema);
- // TODO: custom metadata
- let data = message.finish();
- fbb.finish(data, None);
-
- let data = fbb.finished_data();
- EncodedData {
- ipc_message: data.to_vec(),
- arrow_data: vec![],
- }
- }
-
fn _encode_dictionaries<I: Iterator<Item = i64>>(
&self,
column: &ArrayRef,
@@ -441,13 +366,9 @@ impl IpcDataGenerator {
// It's importnat to only take the dict_id at this point,
because the dict ID
// sequence is assigned depth-first, so we need to first
encode children and have
// them take their assigned dict IDs before we take the dict
ID for this field.
- #[allow(deprecated)]
- let dict_id = dict_id_seq
- .next()
- .or_else(|| field.dict_id())
- .ok_or_else(|| {
- ArrowError::IpcError(format!("no dict id for field
{}", field.name()))
- })?;
+ let dict_id = dict_id_seq.next().ok_or_else(|| {
+ ArrowError::IpcError(format!("no dict id for field {}",
field.name()))
+ })?;
let emit = dictionary_tracker.insert(dict_id, column)?;
@@ -789,11 +710,6 @@ pub struct DictionaryTracker {
written: HashMap<i64, ArrayData>,
dict_ids: Vec<i64>,
error_on_replacement: bool,
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all fields related to it."
- )]
- preserve_dict_id: bool,
}
impl DictionaryTracker {
@@ -813,52 +729,17 @@ impl DictionaryTracker {
written: HashMap::new(),
dict_ids: Vec::new(),
error_on_replacement,
- preserve_dict_id: false,
}
}
- /// Create a new [`DictionaryTracker`].
- ///
- /// If `error_on_replacement`
- /// is true, an error will be generated if an update to an
- /// existing dictionary is attempted.
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all functions related to it."
- )]
- pub fn new_with_preserve_dict_id(error_on_replacement: bool,
preserve_dict_id: bool) -> Self {
- #[allow(deprecated)]
- Self {
- written: HashMap::new(),
- dict_ids: Vec::new(),
- error_on_replacement,
- preserve_dict_id,
- }
- }
-
- /// Set the dictionary ID for `field`.
- ///
- /// If `preserve_dict_id` is true, this will return the `dict_id` in
`field` (or panic if `field` does
- /// not have a `dict_id` defined).
- ///
- /// If `preserve_dict_id` is false, this will return the value of the last
`dict_id` assigned incremented by 1
- /// or 0 in the case where no dictionary IDs have yet been assigned
- #[deprecated(
- since = "54.0.0",
- note = "The ability to preserve dictionary IDs will be removed. With
it, all functions related to it."
- )]
- pub fn set_dict_id(&mut self, field: &Field) -> i64 {
- #[allow(deprecated)]
- let next = if self.preserve_dict_id {
- #[allow(deprecated)]
- field.dict_id().expect("no dict_id in field")
- } else {
- self.dict_ids
- .last()
- .copied()
- .map(|i| i + 1)
- .unwrap_or_default()
- };
+ /// Record and return the next dictionary ID.
+ pub fn next_dict_id(&mut self) -> i64 {
+ let next = self
+ .dict_ids
+ .last()
+ .copied()
+ .map(|i| i + 1)
+ .unwrap_or_default();
self.dict_ids.push(next);
next
@@ -995,11 +876,7 @@ impl<W: Write> FileWriter<W> {
writer.write_all(&super::ARROW_MAGIC)?;
writer.write_all(&PADDING[..pad_len])?;
// write the schema, set the written bytes to the schema + header
- #[allow(deprecated)]
- let preserve_dict_id = write_options.preserve_dict_id;
- #[allow(deprecated)]
- let mut dictionary_tracker =
- DictionaryTracker::new_with_preserve_dict_id(true,
preserve_dict_id);
+ let mut dictionary_tracker = DictionaryTracker::new(true);
let encoded_message = data_gen.schema_to_bytes_with_dictionary_tracker(
schema,
&mut dictionary_tracker,
@@ -1074,11 +951,7 @@ impl<W: Write> FileWriter<W> {
let mut fbb = FlatBufferBuilder::new();
let dictionaries = fbb.create_vector(&self.dictionary_blocks);
let record_batches = fbb.create_vector(&self.record_blocks);
- #[allow(deprecated)]
- let preserve_dict_id = self.write_options.preserve_dict_id;
- #[allow(deprecated)]
- let mut dictionary_tracker =
- DictionaryTracker::new_with_preserve_dict_id(true,
preserve_dict_id);
+ let mut dictionary_tracker = DictionaryTracker::new(true);
let schema = IpcSchemaEncoder::new()
.with_dictionary_tracker(&mut dictionary_tracker)
.schema_to_fb_offset(&mut fbb, &self.schema);
@@ -1229,11 +1102,7 @@ impl<W: Write> StreamWriter<W> {
write_options: IpcWriteOptions,
) -> Result<Self, ArrowError> {
let data_gen = IpcDataGenerator::default();
- #[allow(deprecated)]
- let preserve_dict_id = write_options.preserve_dict_id;
- #[allow(deprecated)]
- let mut dictionary_tracker =
- DictionaryTracker::new_with_preserve_dict_id(false,
preserve_dict_id);
+ let mut dictionary_tracker = DictionaryTracker::new(false);
// write the schema, set the written bytes to the schema
let encoded_message = data_gen.schema_to_bytes_with_dictionary_tracker(
@@ -2141,7 +2010,7 @@ mod tests {
// Dict field with id 2
#[allow(deprecated)]
- let dctfield = Field::new_dict("dict", array.data_type().clone(),
false, 2, false);
+ let dctfield = Field::new_dict("dict", array.data_type().clone(),
false, 0, false);
let union_fields = [(0, Arc::new(dctfield))].into_iter().collect();
let types = [0, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
@@ -2155,17 +2024,22 @@ mod tests {
false,
)]));
+ let gen = IpcDataGenerator {};
+ let mut dict_tracker = DictionaryTracker::new(false);
+ gen.schema_to_bytes_with_dictionary_tracker(
+ &schema,
+ &mut dict_tracker,
+ &IpcWriteOptions::default(),
+ );
+
let batch = RecordBatch::try_new(schema,
vec![Arc::new(union)]).unwrap();
- let gen = IpcDataGenerator {};
- #[allow(deprecated)]
- let mut dict_tracker =
DictionaryTracker::new_with_preserve_dict_id(false, true);
gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
.unwrap();
// The encoder will assign dict IDs itself to ensure uniqueness and
ignore the dict ID in the schema
// so we expect the dict will be keyed to 0
- assert!(dict_tracker.written.contains_key(&2));
+ assert!(dict_tracker.written.contains_key(&0));
}
#[test]
@@ -2193,15 +2067,20 @@ mod tests {
false,
)]));
+ let gen = IpcDataGenerator {};
+ let mut dict_tracker = DictionaryTracker::new(false);
+ gen.schema_to_bytes_with_dictionary_tracker(
+ &schema,
+ &mut dict_tracker,
+ &IpcWriteOptions::default(),
+ );
+
let batch = RecordBatch::try_new(schema, vec![struct_array]).unwrap();
- let gen = IpcDataGenerator {};
- #[allow(deprecated)]
- let mut dict_tracker =
DictionaryTracker::new_with_preserve_dict_id(false, true);
gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
.unwrap();
- assert!(dict_tracker.written.contains_key(&2));
+ assert!(dict_tracker.written.contains_key(&0));
}
fn write_union_file(options: IpcWriteOptions) {
@@ -3029,7 +2908,6 @@ mod tests {
let trailer_start = buffer.len() - 10;
let footer_len =
read_footer_length(buffer[trailer_start..].try_into().unwrap()).unwrap();
let footer = root_as_footer(&buffer[trailer_start -
footer_len..trailer_start]).unwrap();
-
let schema = fb_to_schema(footer.schema().unwrap());
// Importantly we set `require_alignment`, otherwise the error later
is suppressed due to copying
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 64a4e0e115..b9688fd017 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -180,9 +180,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) ->
Result<Schema> {
/// Encodes the Arrow schema into the IPC format, and base64 encodes it
pub fn encode_arrow_schema(schema: &Schema) -> String {
let options = writer::IpcWriteOptions::default();
- #[allow(deprecated)]
- let mut dictionary_tracker =
- writer::DictionaryTracker::new_with_preserve_dict_id(true,
options.preserve_dict_id());
+ let mut dictionary_tracker = writer::DictionaryTracker::new(true);
let data_gen = writer::IpcDataGenerator::default();
let mut serialized_schema =
data_gen.schema_to_bytes_with_dictionary_tracker(schema, &mut
dictionary_tracker, &options);