rdblue commented on code in PR #3231:
URL: https://github.com/apache/iceberg/pull/3231#discussion_r1279975190


##########
core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java:
##########
@@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.encryption;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import org.apache.iceberg.io.IOUtil;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+
+public class AesGcmInputStream extends SeekableInputStream {
+  private final SeekableInputStream sourceStream;
+  private final byte[] fileAADPrefix;
+  private final Ciphers.AesGcmDecryptor decryptor;
+  private final byte[] cipherBlockBuffer;
+  private final long numBlocks;
+  private final int lastCipherBlockSize;
+  private final long plainStreamSize;
+
+  private long plainStreamPosition;
+  private long currentPlainBlockIndex;
+  private byte[] currentPlainBlock;
+  private int currentPlainBlockSize;
+  private byte[] singleByte;
+
+  AesGcmInputStream(
+      SeekableInputStream sourceStream, long sourceLength, byte[] aesKey, 
byte[] fileAADPrefix) {
+    this.sourceStream = sourceStream;
+    this.fileAADPrefix = fileAADPrefix;
+    this.decryptor = new Ciphers.AesGcmDecryptor(aesKey);
+    this.cipherBlockBuffer = new byte[Ciphers.CIPHER_BLOCK_SIZE];
+
+    this.plainStreamPosition = 0;
+    this.currentPlainBlockIndex = -1;
+    this.currentPlainBlock = null;
+    this.currentPlainBlockSize = 0;
+
+    long streamLength = sourceLength - Ciphers.GCM_STREAM_HEADER_LENGTH;
+    long numFullBlocks = Math.toIntExact(streamLength / 
Ciphers.CIPHER_BLOCK_SIZE);
+    long cipherFullBlockLength = numFullBlocks * Ciphers.CIPHER_BLOCK_SIZE;
+    int cipherBytesInLastBlock = Math.toIntExact(streamLength - 
cipherFullBlockLength);
+    boolean fullBlocksOnly = (0 == cipherBytesInLastBlock);
+    this.numBlocks = fullBlocksOnly ? numFullBlocks : numFullBlocks + 1;
+    this.lastCipherBlockSize =
+        fullBlocksOnly ? Ciphers.CIPHER_BLOCK_SIZE : cipherBytesInLastBlock; 
// never 0
+
+    long lastPlainBlockSize =
+        (long) lastCipherBlockSize - Ciphers.NONCE_LENGTH - 
Ciphers.GCM_TAG_LENGTH;
+    this.plainStreamSize =
+        numFullBlocks * Ciphers.PLAIN_BLOCK_SIZE + (fullBlocksOnly ? 0 : 
lastPlainBlockSize);
+    this.singleByte = new byte[1];
+  }
+
+  private void validateHeader() throws IOException {
+    byte[] headerBytes = new byte[Ciphers.GCM_STREAM_HEADER_LENGTH];
+    IOUtil.readFully(sourceStream, headerBytes, 0, headerBytes.length);
+
+    Preconditions.checkState(
+        Ciphers.GCM_STREAM_MAGIC.equals(ByteBuffer.wrap(headerBytes, 0, 4)),
+        "Invalid GCM stream: magic does not match AGS1");
+
+    int plainBlockSize = ByteBuffer.wrap(headerBytes, 4, 
4).order(ByteOrder.LITTLE_ENDIAN).getInt();
+    Preconditions.checkState(
+        plainBlockSize == Ciphers.PLAIN_BLOCK_SIZE,
+        "Invalid GCM stream: block size %d != %d",
+        plainBlockSize,
+        Ciphers.PLAIN_BLOCK_SIZE);
+  }
+
+  @Override
+  public int available() {
+    long maxAvailable = plainStreamSize - plainStreamPosition;
+    // See InputStream.available contract
+    if (maxAvailable >= Integer.MAX_VALUE) {
+      return Integer.MAX_VALUE;
+    } else {
+      return (int) maxAvailable;
+    }
+  }
+
+  private int availableInCurrentBlock() {
+    if (currentPlainBlockIndex < 0) {

Review Comment:
   When I was refactoring, there was a bug that I fixed when the block for 
`plainStreamPosition` didn't match `currentPlainBlockIndex`. I ended up fixing 
the problem by setting `currentPlainBlockIndex = -1` in a place that I had 
missed. Now that I'm thinking about it more, I think the right solution is 
actually to update the check here instead. That way the bytes available will 
only be non-zero if the current block matches the position.
   
   Here's a diff that does what I'm talking about and still passes tests:
   
   ```diff
   diff --git 
a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java 
b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
   index a63134f31d..88e9b36c25 100644
   --- a/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
   +++ b/core/src/main/java/org/apache/iceberg/encryption/AesGcmInputStream.java
   @@ -97,7 +97,7 @@ public class AesGcmInputStream extends SeekableInputStream 
{
      }
    
      private int availableInCurrentBlock() {
   -    if (currentPlainBlockIndex < 0) {
   +    if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
          return 0;
        }
    
   @@ -130,10 +130,6 @@ public class AesGcmInputStream extends 
SeekableInputStream {
            remainingBytesToRead -= bytesToCopy;
            resultBufferOffset += bytesToCopy;
            this.plainStreamPosition += bytesToCopy;
   -        if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
   -          // invalidate the current block
   -          this.currentPlainBlockIndex = -1;
   -        }
    
          } else if (available() > 0) {
            decryptBlock(blockIndex(plainStreamPosition));
   @@ -157,10 +153,6 @@ public class AesGcmInputStream extends 
SeekableInputStream {
        }
    
        this.plainStreamPosition = newPos;
   -    if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
   -      // invalidate the current block
   -      this.currentPlainBlockIndex = -1;
   -    }
      }
    
      @Override
   @@ -177,10 +169,6 @@ public class AesGcmInputStream extends 
SeekableInputStream {
        }
    
        this.plainStreamPosition += n;
   -    if (blockIndex(plainStreamPosition) != currentPlainBlockIndex) {
   -      // invalidate the current block
   -      this.currentPlainBlockIndex = -1;
   -    }
    
        return n;
      }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to