This is an automated email from the ASF dual-hosted git repository.

etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 7e85b48dc8 Adding Bloom Filter Position argument in parquet-rewrite 
(#7550)
7e85b48dc8 is described below

commit 7e85b48dc8f929afa82f2878b17db7b2df240b8b
Author: Jigao Luo <[email protected]>
AuthorDate: Wed May 28 21:08:16 2025 +0200

    Adding Bloom Filter Position argument in parquet-rewrite (#7550)
    
    Signed-off-by: Jigao Luo <[email protected]>
---
 parquet/src/bin/parquet-rewrite.rs | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/parquet/src/bin/parquet-rewrite.rs 
b/parquet/src/bin/parquet-rewrite.rs
index 5a1ec94d55..28a596023c 100644
--- a/parquet/src/bin/parquet-rewrite.rs
+++ b/parquet/src/bin/parquet-rewrite.rs
@@ -41,7 +41,7 @@ use parquet::{
     arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter},
     basic::Compression,
     file::{
-        properties::{EnabledStatistics, WriterProperties, WriterVersion},
+        properties::{BloomFilterPosition, EnabledStatistics, WriterProperties, 
WriterVersion},
         reader::FileReader,
         serialized_reader::SerializedFileReader,
     },
@@ -139,6 +139,24 @@ impl From<WriterVersionArgs> for WriterVersion {
     }
 }
 
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
+enum BloomFilterPositionArgs {
+    /// Write Bloom Filters of each row group right after the row group
+    AfterRowGroup,
+
+    /// Write Bloom Filters at the end of the file
+    End,
+}
+
+impl From<BloomFilterPositionArgs> for BloomFilterPosition {
+    fn from(value: BloomFilterPositionArgs) -> Self {
+        match value {
+            BloomFilterPositionArgs::AfterRowGroup => Self::AfterRowGroup,
+            BloomFilterPositionArgs::End => Self::End,
+        }
+    }
+}
+
 #[derive(Debug, Parser)]
 #[clap(author, version, about("Read and write parquet file with potentially 
different settings"), long_about = None)]
 struct Args {
@@ -188,6 +206,10 @@ struct Args {
     #[clap(long)]
     bloom_filter_ndv: Option<u64>,
 
+    /// Sets the position of bloom filter
+    #[clap(long)]
+    bloom_filter_position: Option<BloomFilterPositionArgs>,
+
     /// Sets flag to enable/disable dictionary encoding for any column.
     #[clap(long)]
     dictionary_enabled: Option<bool>,
@@ -256,6 +278,10 @@ fn main() {
             if let Some(value) = args.bloom_filter_ndv {
                 writer_properties_builder = 
writer_properties_builder.set_bloom_filter_ndv(value);
             }
+            if let Some(value) = args.bloom_filter_position {
+                writer_properties_builder =
+                    
writer_properties_builder.set_bloom_filter_position(value.into());
+            }
         }
     }
     if let Some(value) = args.dictionary_enabled {

Reply via email to