This is an automated email from the ASF dual-hosted git repository.
mneumann pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git
The following commit(s) were added to refs/heads/main by this push:
new c9475c5 aws: fix bug in multipart copy when SHA256 checksum is used
(#569)
c9475c5 is described below
commit c9475c5c4eb73d5b23e242497ccecc6861c52858
Author: james-rms <[email protected]>
AuthorDate: Fri Dec 12 21:27:12 2025 +1100
aws: fix bug in multipart copy when SHA256 checksum is used (#569)
* aws: fix bug in multipart copy when sha256 checksum is specificed
* add integration test
* update localstack in CI
---
.github/workflows/ci.yml | 3 ++-
src/aws/client.rs | 34 +++++++++++++++++-----------------
src/aws/mod.rs | 26 ++++++++++++++++++++++++++
src/client/s3.rs | 2 ++
4 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6f3ca5b..8eb4903 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -125,11 +125,12 @@ jobs:
- name: Setup LocalStack (AWS emulation)
run: |
- echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566
localstack/localstack:4.0.3)" >> $GITHUB_ENV
+ echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566
localstack/localstack:4.11.1)" >> $GITHUB_ENV
echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338
amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV
aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket
aws --endpoint-url=http://localhost:4566 s3 mb
s3://test-bucket-for-spawn
aws --endpoint-url=http://localhost:4566 s3 mb
s3://test-bucket-for-checksum
+ aws --endpoint-url=http://localhost:4566 s3 mb
s3://test-bucket-for-copy-if-not-exists
aws --endpoint-url=http://localhost:4566 s3api create-bucket
--bucket test-object-lock --object-lock-enabled-for-bucket
KMS_KEY=$(aws --endpoint-url=http://localhost:4566 kms create-key
--description "test key")
diff --git a/src/aws/client.rs b/src/aws/client.rs
index 6337187..150a47c 100644
--- a/src/aws/client.rs
+++ b/src/aws/client.rs
@@ -701,24 +701,24 @@ impl S3Client {
// If SSE-C is used, we must include the encryption headers in
every upload request.
request = request.with_encryption_headers();
}
+
let (parts, body) = request.send().await?.into_parts();
- let checksum_sha256 = parts
- .headers
- .get(SHA256_CHECKSUM)
- .and_then(|v| v.to_str().ok())
- .map(|v| v.to_string());
-
- let e_tag = match is_copy {
- false => get_etag(&parts.headers).map_err(|source| Error::Metadata
{ source })?,
- true => {
- let response = body
- .bytes()
- .await
- .map_err(|source| Error::CreateMultipartResponseBody {
source })?;
- let response: CopyPartResult =
quick_xml::de::from_reader(response.reader())
- .map_err(|source| Error::InvalidMultipartResponse { source
})?;
- response.e_tag
- }
+ let (e_tag, checksum_sha256) = if is_copy {
+ let response = body
+ .bytes()
+ .await
+ .map_err(|source| Error::CreateMultipartResponseBody { source
})?;
+ let response: CopyPartResult =
quick_xml::de::from_reader(response.reader())
+ .map_err(|source| Error::InvalidMultipartResponse { source })?;
+ (response.e_tag, response.checksum_sha256)
+ } else {
+ let e_tag = get_etag(&parts.headers).map_err(|source|
Error::Metadata { source })?;
+ let checksum_sha256 = parts
+ .headers
+ .get(SHA256_CHECKSUM)
+ .and_then(|v| v.to_str().ok())
+ .map(|v| v.to_string());
+ (e_tag, checksum_sha256)
};
let content_id = if self.config.checksum == Some(Checksum::SHA256) {
diff --git a/src/aws/mod.rs b/src/aws/mod.rs
index 3e658af..cb66f9d 100644
--- a/src/aws/mod.rs
+++ b/src/aws/mod.rs
@@ -553,6 +553,32 @@ mod tests {
store.delete(&path).await.unwrap();
}
+ #[tokio::test]
+ async fn copy_multipart_file_with_signature() {
+ maybe_skip_integration!();
+
+ let bucket = "test-bucket-for-copy-if-not-exists";
+ let store = AmazonS3Builder::from_env()
+ .with_bucket_name(bucket)
+ .with_checksum_algorithm(Checksum::SHA256)
+ .with_copy_if_not_exists(S3CopyIfNotExists::Multipart)
+ .build()
+ .unwrap();
+
+ let src = Path::parse("src.bin").unwrap();
+ let dst = Path::parse("dst.bin").unwrap();
+ store
+ .put(&src, PutPayload::from(vec![0u8; 100_000]))
+ .await
+ .unwrap();
+ if store.head(&dst).await.is_ok() {
+ store.delete(&dst).await.unwrap();
+ }
+ store.copy_if_not_exists(&src, &dst).await.unwrap();
+ store.delete(&src).await.unwrap();
+ store.delete(&dst).await.unwrap();
+ }
+
#[tokio::test]
async fn write_multipart_file_with_signature_object_lock() {
maybe_skip_integration!();
diff --git a/src/client/s3.rs b/src/client/s3.rs
index a2221fb..a1b113e 100644
--- a/src/client/s3.rs
+++ b/src/client/s3.rs
@@ -98,6 +98,8 @@ pub(crate) struct InitiateMultipartUploadResult {
pub(crate) struct CopyPartResult {
#[serde(rename = "ETag")]
pub e_tag: String,
+ #[serde(default, rename = "ChecksumSHA256")]
+ pub checksum_sha256: Option<String>,
}
#[derive(Debug, Serialize)]