This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 11158ba8cf Support writing encrypted Parquet files with plaintext
footers (#7439)
11158ba8cf is described below
commit 11158ba8cfc471354ed14d04c88c9c631143459a
Author: Rok Mihevc <[email protected]>
AuthorDate: Thu May 8 12:59:00 2025 +0200
Support writing encrypted Parquet files with plaintext footers (#7439)
* Initial commit
* Lint and clippy
* Plaintext layout is different to encrypted one
* Lint and expected memory size at decryption
* Apply suggestions from code review
Co-authored-by: Adam Reeve <[email protected]>
* Review feedback
* Lint
* Update parquet/tests/encryption/encryption.rs
Co-authored-by: Adam Reeve <[email protected]>
* Review feedback
* Initial commit
* Encrypt plaintext to extract nonce and footer
* fix
* Add encryption algorithm to file_metadata before writing
* fix
* Apply suggestions from code review
Co-authored-by: Adam Reeve <[email protected]>
* Fix
* lint
* Move get_footer_encryption_algorithm into MetadataObjectWriter
* Fix
* Review feedback
* Avoid unwraps in file_crypto_metada method
* Minor tidy
* Test wrting and reading with a different footer key
---------
Co-authored-by: Adam Reeve <[email protected]>
---
parquet/src/encryption/encrypt.rs | 27 ++++++++++++++-
parquet/src/file/metadata/writer.rs | 62 +++++++++++++++++++++++-----------
parquet/src/file/writer.rs | 6 ----
parquet/tests/encryption/encryption.rs | 52 ++++++++++++++++++++++------
4 files changed, 111 insertions(+), 36 deletions(-)
diff --git a/parquet/src/encryption/encrypt.rs
b/parquet/src/encryption/encrypt.rs
index 9a801434c0..c8d3ffc0ee 100644
--- a/parquet/src/encryption/encrypt.rs
+++ b/parquet/src/encryption/encrypt.rs
@@ -17,7 +17,9 @@
//! Configuration and utilities for Parquet Modular Encryption
-use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor};
+use crate::encryption::ciphers::{
+ BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN,
+};
use crate::errors::{ParquetError, Result};
use crate::file::column_crypto_metadata::{ColumnCryptoMetaData,
EncryptionWithColumnKey};
use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
@@ -374,6 +376,29 @@ pub(crate) fn encrypt_object<T: TSerializable, W: Write>(
Ok(())
}
+pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
+ object: &T,
+ encryptor: &mut Box<dyn BlockEncryptor>,
+ sink: &mut W,
+ module_aad: &[u8],
+) -> Result<()> {
+ let mut buffer: Vec<u8> = vec![];
+ {
+ let mut protocol = TCompactOutputProtocol::new(&mut buffer);
+ object.write_to_out_protocol(&mut protocol)?;
+ }
+ sink.write_all(&buffer)?;
+ buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?;
+
+ // Format of encrypted buffer is: [ciphertext size, nonce, ciphertext,
authentication tag]
+ let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN];
+ let tag = &buffer[buffer.len() - TAG_LEN..];
+ sink.write_all(nonce)?;
+ sink.write_all(tag)?;
+
+ Ok(())
+}
+
/// Encrypt a Thrift serializable object to a byte vector
pub(crate) fn encrypt_object_to_vec<T: TSerializable>(
object: &T,
diff --git a/parquet/src/file/metadata/writer.rs
b/parquet/src/file/metadata/writer.rs
index c1fc413144..a01ad5d881 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -16,17 +16,21 @@
// under the License.
#[cfg(feature = "encryption")]
-use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec,
FileEncryptor};
-#[cfg(feature = "encryption")]
-use crate::encryption::modules::{create_footer_aad, create_module_aad,
ModuleType};
+use crate::encryption::{
+ encrypt::{
+ encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object,
FileEncryptor,
+ },
+ modules::{create_footer_aad, create_module_aad, ModuleType},
+};
#[cfg(feature = "encryption")]
use crate::errors::ParquetError;
use crate::errors::Result;
use crate::file::metadata::{KeyValue, ParquetMetaData};
use crate::file::page_index::index::Index;
use crate::file::writer::{get_file_magic, TrackedWrite};
+use crate::format::EncryptionAlgorithm;
#[cfg(feature = "encryption")]
-use crate::format::{AesGcmV1, ColumnCryptoMetaData, EncryptionAlgorithm};
+use crate::format::{AesGcmV1, ColumnCryptoMetaData};
use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex,
RowGroup};
use crate::schema::types;
use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr};
@@ -149,7 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
schema: types::to_thrift(self.schema.as_ref())?,
created_by: self.created_by.clone(),
column_orders,
- encryption_algorithm: None,
+ encryption_algorithm:
self.object_writer.get_footer_encryption_algorithm(),
footer_signing_key_metadata: None,
};
@@ -474,6 +478,10 @@ impl MetadataObjectWriter {
pub fn get_file_magic(&self) -> &[u8; 4] {
get_file_magic()
}
+
+ fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
+ None
+ }
}
/// Implementations of [`MetadataObjectWriter`] methods that rely on
encryption being enabled
@@ -503,6 +511,11 @@ impl MetadataObjectWriter {
let mut encryptor = file_encryptor.get_footer_encryptor()?;
encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad)
}
+ Some(file_encryptor) if
file_metadata.encryption_algorithm.is_some() => {
+ let aad = create_footer_aad(file_encryptor.file_aad())?;
+ let mut encryptor = file_encryptor.get_footer_encryptor()?;
+ write_signed_plaintext_object(file_metadata, &mut encryptor,
&mut sink, &aad)
+ }
_ => Self::write_object(file_metadata, &mut sink),
}
}
@@ -622,25 +635,36 @@ impl MetadataObjectWriter {
}
}
- fn file_crypto_metadata(
- file_encryptor: &FileEncryptor,
- ) -> Result<crate::format::FileCryptoMetaData> {
- let properties = file_encryptor.properties();
- let supply_aad_prefix = properties
+ fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
+ if let Some(file_encryptor) = &self.file_encryptor {
+ return
Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
+ }
+ None
+ }
+
+ fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) ->
EncryptionAlgorithm {
+ let supply_aad_prefix = file_encryptor
+ .properties()
.aad_prefix()
- .map(|_| !properties.store_aad_prefix());
- let encryption_algorithm = AesGcmV1 {
- aad_prefix: if properties.store_aad_prefix() {
- properties.aad_prefix().cloned()
- } else {
- None
- },
+ .map(|_| !file_encryptor.properties().store_aad_prefix());
+ let aad_prefix = if file_encryptor.properties().store_aad_prefix() {
+ file_encryptor.properties().aad_prefix().cloned()
+ } else {
+ None
+ };
+ EncryptionAlgorithm::AESGCMV1(AesGcmV1 {
+ aad_prefix,
aad_file_unique: Some(file_encryptor.aad_file_unique().clone()),
supply_aad_prefix,
- };
+ })
+ }
+ fn file_crypto_metadata(
+ file_encryptor: &FileEncryptor,
+ ) -> Result<crate::format::FileCryptoMetaData> {
+ let properties = file_encryptor.properties();
Ok(crate::format::FileCryptoMetaData {
- encryption_algorithm:
EncryptionAlgorithm::AESGCMV1(encryption_algorithm),
+ encryption_algorithm:
Self::encryption_algorithm_from_encryptor(file_encryptor),
key_metadata: properties.footer_key_metadata().cloned(),
})
}
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 18e357ebc2..0298d8a51d 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -212,12 +212,6 @@ impl<W: Write + Send> SerializedFileWriter<W> {
if let Some(file_encryption_properties) =
&properties.file_encryption_properties {
file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?;
- if !file_encryption_properties.encrypt_footer() {
- return Err(general_err!(
- "Writing encrypted files with plaintext footers is not
supported yet"
- ));
- }
-
Ok(Some(Arc::new(FileEncryptor::new(
file_encryption_properties.clone(),
)?)))
diff --git a/parquet/tests/encryption/encryption.rs
b/parquet/tests/encryption/encryption.rs
index da263a5df6..134e3383b3 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -433,18 +433,23 @@ fn test_write_non_uniform_encryption() {
read_and_roundtrip_to_encrypted_file(&path, decryption_properties,
file_encryption_properties);
}
-// todo: currently we raise if writing with plaintext footer, but we should
support it
-// for uniform and non-uniform encryption (see
https://github.com/apache/arrow-rs/issues/7320)
#[test]
fn test_write_uniform_encryption_plaintext_footer() {
let testdata = arrow::util::test_util::parquet_test_data();
- let path =
format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+ let path =
format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
let footer_key = b"0123456789012345".to_vec(); // 128bit/16
+ let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16
let column_1_key = b"1234567890123450".to_vec();
let column_2_key = b"1234567890123451".to_vec();
let decryption_properties =
FileDecryptionProperties::builder(footer_key.clone())
+ .with_column_key("double_field", column_1_key.clone())
+ .with_column_key("float_field", column_2_key.clone())
+ .build()
+ .unwrap();
+
+ let wrong_decryption_properties =
FileDecryptionProperties::builder(wrong_footer_key)
.with_column_key("double_field", column_1_key)
.with_column_key("float_field", column_2_key)
.build()
@@ -455,26 +460,53 @@ fn test_write_uniform_encryption_plaintext_footer() {
.build()
.unwrap();
+ // Try writing plaintext footer and then reading it with the correct
footer key
+ read_and_roundtrip_to_encrypted_file(
+ &path,
+ decryption_properties.clone(),
+ file_encryption_properties.clone(),
+ );
+
+ // Try writing plaintext footer and then reading it with the wrong footer
key
+ let temp_file = tempfile::tempfile().unwrap();
+
+ // read example data
let file = File::open(path).unwrap();
let options = ArrowReaderOptions::default()
.with_file_decryption_properties(decryption_properties.clone());
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
+ let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file,
options).unwrap();
+ let batch_reader = builder.build().unwrap();
+ let batches = batch_reader
+ .collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
+ .unwrap();
+
+ // write example data
let props = WriterProperties::builder()
.with_file_encryption_properties(file_encryption_properties)
.build();
- let temp_file = tempfile::tempfile().unwrap();
- let writer = ArrowWriter::try_new(
+ let mut writer = ArrowWriter::try_new(
temp_file.try_clone().unwrap(),
metadata.schema().clone(),
Some(props),
- );
- assert!(writer.is_err());
- assert_eq!(
- writer.unwrap_err().to_string(),
- "Parquet error: Writing encrypted files with plaintext footers is not
supported yet"
)
+ .unwrap();
+ for batch in batches {
+ writer.write(&batch).unwrap();
+ }
+ writer.close().unwrap();
+
+ // Try reading plaintext footer and with the wrong footer key
+ let options =
+
ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties);
+ let result = ArrowReaderMetadata::load(&temp_file, options.clone());
+ assert!(result.is_err());
+ assert!(result
+ .unwrap_err()
+ .to_string()
+ .starts_with("Parquet error: Footer signature verification failed.
Computed: ["));
}
#[test]