mikemccand commented on code in PR #12506: URL: https://github.com/apache/lucene/pull/12506#discussion_r1368976156
########## lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java: ########## @@ -46,6 +65,7 @@ protected Allocator(int blockSize) { public abstract void recycleByteBlocks(byte[][] blocks, int start, int end); + // TODO: This is not used. Can we remove? Review Comment: +1 to remove now! This is an internal API -- we are free to suddenly change it. ########## lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java: ########## @@ -170,42 +191,42 @@ public void reset(boolean zeroFillBuffers, boolean reuseFirst) { } /** - * Advances the pool to its next buffer. This method should be called once after the constructor - * to initialize the pool. In contrast to the constructor a {@link ByteBlockPool#reset()} call - * will advance the pool to its first buffer immediately. + * Allocates a new buffer and advances the pool to it. This method should be called once after the + * constructor to initialize the pool. In contrast to the constructor, a {@link + * ByteBlockPool#reset()} call will advance the pool to its first buffer immediately. */ public void nextBuffer() { if (1 + bufferUpto == buffers.length) { + // The buffer array is full - expand it byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length + 1, NUM_BYTES_OBJECT_REF)][]; System.arraycopy(buffers, 0, newBuffers, 0, buffers.length); buffers = newBuffers; } + // Allocate new buffer and advance the pool to it buffer = buffers[1 + bufferUpto] = allocator.getByteBlock(); bufferUpto++; - byteUpto = 0; byteOffset = Math.addExact(byteOffset, BYTE_BLOCK_SIZE); } /** - * Fill the provided {@link BytesRef} with the bytes at the specified offset/length slice. This - * will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses the - * provided {@link BytesRefBuilder} to copy bytes over. + * Fill the provided {@link BytesRef} with the bytes at the specified offset and length. This will + * avoid copying the bytes if the slice fits into a single block; otherwise, it uses the provided + * {@link BytesRefBuilder} to copy bytes over. */ - void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int length) { + void setBytesRef(BytesRefBuilder builder, BytesRef result, int offset, int length) { Review Comment: Hmm why the change from `long` -> `int`? Previously we were able to address `32 + BYTE_BLOCK_SHIFT` bits of address space using `long`? Or did that fail to work somewhere? ########## lucene/core/src/java/org/apache/lucene/util/ByteSlicePool.java: ########## @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +/** + * Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] + * arrays. The idea is to allocate slices of increasing lengths For example, the first slice is 5 Review Comment: Period after lengths -- `of increasing lengths. For example,` ########## lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java: ########## @@ -170,42 +191,42 @@ public void reset(boolean zeroFillBuffers, boolean reuseFirst) { } /** - * Advances the pool to its next buffer. This method should be called once after the constructor - * to initialize the pool. In contrast to the constructor a {@link ByteBlockPool#reset()} call - * will advance the pool to its first buffer immediately. + * Allocates a new buffer and advances the pool to it. This method should be called once after the + * constructor to initialize the pool. In contrast to the constructor, a {@link + * ByteBlockPool#reset()} call will advance the pool to its first buffer immediately. */ public void nextBuffer() { if (1 + bufferUpto == buffers.length) { + // The buffer array is full - expand it byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length + 1, NUM_BYTES_OBJECT_REF)][]; System.arraycopy(buffers, 0, newBuffers, 0, buffers.length); buffers = newBuffers; } + // Allocate new buffer and advance the pool to it buffer = buffers[1 + bufferUpto] = allocator.getByteBlock(); bufferUpto++; - byteUpto = 0; byteOffset = Math.addExact(byteOffset, BYTE_BLOCK_SIZE); } /** - * Fill the provided {@link BytesRef} with the bytes at the specified offset/length slice. This - * will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses the - * provided {@link BytesRefBuilder} to copy bytes over. + * Fill the provided {@link BytesRef} with the bytes at the specified offset and length. This will + * avoid copying the bytes if the slice fits into a single block; otherwise, it uses the provided + * {@link BytesRefBuilder} to copy bytes over. */ - void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int length) { + void setBytesRef(BytesRefBuilder builder, BytesRef result, int offset, int length) { result.length = length; - int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT); Review Comment: If we stick with `long` we should change this to `Math.toIntExact` to catch overflow? ########## lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java: ########## @@ -259,14 +280,15 @@ public void readBytes(final long offset, final byte[] bytes, int bytesOffset, in @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES; - size += RamUsageEstimator.sizeOfObject(buffer); size += RamUsageEstimator.shallowSizeOf(buffers); for (byte[] buf : buffers) { - if (buf == buffer) { Review Comment: Why did the previous code special case `buffer`? Isn't `buffer` a full sized block? ########## lucene/core/src/java/org/apache/lucene/util/ByteSlicePool.java: ########## @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +/** + * Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] Review Comment: Maybe say `write interleaved byte streams`? ########## lucene/core/src/test/org/apache/lucene/util/TestByteSlicePool.java: ########## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; + +public class TestByteSlicePool extends LuceneTestCase { + public void testAllocKnowSizeSlice() { + Counter bytesUsed = Counter.newCounter(); + ByteBlockPool blockPool = + new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); + blockPool.nextBuffer(); + ByteSlicePool slicePool = new ByteSlicePool(blockPool); + for (int i = 0; i < 100; i++) { + int size; + if (random().nextBoolean()) { + size = TestUtil.nextInt(random(), 100, 1000); + } else { + size = TestUtil.nextInt(random(), 50000, 100000); + } + byte[] randomData = new byte[size]; + random().nextBytes(randomData); + + int upto = slicePool.newSlice(ByteSlicePool.FIRST_LEVEL_SIZE); + + for (int offset = 0; offset < size; ) { + if ((blockPool.buffer[upto] & 16) == 0) { + blockPool.buffer[upto++] = randomData[offset++]; + } else { + int offsetAndLength = slicePool.allocKnownSizeSlice(blockPool.buffer, upto); + int sliceLength = offsetAndLength & 0xff; + upto = offsetAndLength >> 8; + assertNotEquals(0, blockPool.buffer[upto + sliceLength - 1]); + assertEquals(0, blockPool.buffer[upto]); + int writeLength = Math.min(sliceLength - 1, size - offset); + System.arraycopy(randomData, offset, blockPool.buffer, upto, writeLength); + offset += writeLength; + upto += writeLength; + } + } + } + } + + public void testAllocLargeSlice() { Review Comment: Could we maybe add a randomized test that writes a random number of random length interleaves streams, then reads them back and confirms the streams match? ########## lucene/core/src/java/org/apache/lucene/util/ByteSlicePool.java: ########## @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +/** + * Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] + * arrays. The idea is to allocate slices of increasing lengths For example, the first slice is 5 + * bytes, the next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we + * hit the end of the slice, we allocate the next slice and then write the address of the new slice + * into the last 4 bytes of the previous slice (the "forwarding address"). + * + * <p>Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way + * the methods that are writing into the slice don't need to record its length and instead allocate + * a new slice once they hit a non-zero byte. + * + * @lucene.internal + */ +public class ByteSlicePool { Review Comment: `final`? ########## lucene/core/src/test/org/apache/lucene/util/TestByteSlicePool.java: ########## @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; + +public class TestByteSlicePool extends LuceneTestCase { + public void testAllocKnowSizeSlice() { Review Comment: `Know` -> `Known`? ########## lucene/core/src/java/org/apache/lucene/util/ByteSlicePool.java: ########## @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +/** + * Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] + * arrays. The idea is to allocate slices of increasing lengths For example, the first slice is 5 + * bytes, the next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we + * hit the end of the slice, we allocate the next slice and then write the address of the new slice + * into the last 4 bytes of the previous slice (the "forwarding address"). + * + * <p>Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way + * the methods that are writing into the slice don't need to record its length and instead allocate + * a new slice once they hit a non-zero byte. + * + * @lucene.internal + */ +public class ByteSlicePool { + /** + * The underlying structure consists of fixed-size blocks. We overlay variable-length slices on + * top. Each slice is contiguous in memory, i.e. it does not strddle multiple blocks. Review Comment: `strddle` -> `straddle` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org