This is an automated email from the ASF dual-hosted git repository.
etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 7e85b48dc8 Adding Bloom Filter Position argument in parquet-rewrite
(#7550)
7e85b48dc8 is described below
commit 7e85b48dc8f929afa82f2878b17db7b2df240b8b
Author: Jigao Luo <[email protected]>
AuthorDate: Wed May 28 21:08:16 2025 +0200
Adding Bloom Filter Position argument in parquet-rewrite (#7550)
Signed-off-by: Jigao Luo <[email protected]>
---
parquet/src/bin/parquet-rewrite.rs | 28 +++++++++++++++++++++++++++-
1 file changed, 27 insertions(+), 1 deletion(-)
diff --git a/parquet/src/bin/parquet-rewrite.rs
b/parquet/src/bin/parquet-rewrite.rs
index 5a1ec94d55..28a596023c 100644
--- a/parquet/src/bin/parquet-rewrite.rs
+++ b/parquet/src/bin/parquet-rewrite.rs
@@ -41,7 +41,7 @@ use parquet::{
arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter},
basic::Compression,
file::{
- properties::{EnabledStatistics, WriterProperties, WriterVersion},
+ properties::{BloomFilterPosition, EnabledStatistics, WriterProperties,
WriterVersion},
reader::FileReader,
serialized_reader::SerializedFileReader,
},
@@ -139,6 +139,24 @@ impl From<WriterVersionArgs> for WriterVersion {
}
}
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
+enum BloomFilterPositionArgs {
+ /// Write Bloom Filters of each row group right after the row group
+ AfterRowGroup,
+
+ /// Write Bloom Filters at the end of the file
+ End,
+}
+
+impl From<BloomFilterPositionArgs> for BloomFilterPosition {
+ fn from(value: BloomFilterPositionArgs) -> Self {
+ match value {
+ BloomFilterPositionArgs::AfterRowGroup => Self::AfterRowGroup,
+ BloomFilterPositionArgs::End => Self::End,
+ }
+ }
+}
+
#[derive(Debug, Parser)]
#[clap(author, version, about("Read and write parquet file with potentially
different settings"), long_about = None)]
struct Args {
@@ -188,6 +206,10 @@ struct Args {
#[clap(long)]
bloom_filter_ndv: Option<u64>,
+ /// Sets the position of bloom filter
+ #[clap(long)]
+ bloom_filter_position: Option<BloomFilterPositionArgs>,
+
/// Sets flag to enable/disable dictionary encoding for any column.
#[clap(long)]
dictionary_enabled: Option<bool>,
@@ -256,6 +278,10 @@ fn main() {
if let Some(value) = args.bloom_filter_ndv {
writer_properties_builder =
writer_properties_builder.set_bloom_filter_ndv(value);
}
+ if let Some(value) = args.bloom_filter_position {
+ writer_properties_builder =
+
writer_properties_builder.set_bloom_filter_position(value.into());
+ }
}
}
if let Some(value) = args.dictionary_enabled {