Jackie-Jiang commented on code in PR #12945: URL: https://github.com/apache/pinot/pull/12945#discussion_r1588226716
########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/ForwardIndexConfig.java: ########## @@ -31,30 +31,45 @@ import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.FieldConfig.CompressionCodec; import org.apache.pinot.spi.config.table.IndexConfig; +import org.apache.pinot.spi.utils.DataSizeUtils; public class ForwardIndexConfig extends IndexConfig { public static final int DEFAULT_RAW_WRITER_VERSION = 2; - public static final ForwardIndexConfig DISABLED = new ForwardIndexConfig(true, null, null, null, null, null); + public static final int DEFAULT_TARGET_MAX_CHUNK_SIZE = 1024 * 1024; // 1MB + public static final int DEFAULT_TARGET_DOCS_PER_CHUNK = 1000; + public static final ForwardIndexConfig DISABLED = + new ForwardIndexConfig(true, null, null, null, null, null, null, null); public static final ForwardIndexConfig DEFAULT = new Builder().build(); @Nullable private final CompressionCodec _compressionCodec; private final boolean _deriveNumDocsPerChunk; private final int _rawIndexWriterVersion; + private final String _targetMaxChunkSize; + private final int _targetDocsPerChunk; @Nullable private final ChunkCompressionType _chunkCompressionType; @Nullable private final DictIdCompressionType _dictIdCompressionType; public ForwardIndexConfig(@Nullable Boolean disabled, @Nullable CompressionCodec compressionCodec, - @Nullable Boolean deriveNumDocsPerChunk, @Nullable Integer rawIndexWriterVersion) { + @Nullable Boolean deriveNumDocsPerChunk, @Nullable Integer rawIndexWriterVersion, + @Nullable String targetMaxChunkSize, @Nullable Integer targetDocsPerChunk) { super(disabled); _deriveNumDocsPerChunk = Boolean.TRUE.equals(deriveNumDocsPerChunk); _rawIndexWriterVersion = rawIndexWriterVersion == null ? DEFAULT_RAW_WRITER_VERSION : rawIndexWriterVersion; _compressionCodec = compressionCodec; + if (targetMaxChunkSize != null && !(_deriveNumDocsPerChunk || _rawIndexWriterVersion == 4)) { + throw new IllegalStateException( + "targetMaxChunkSize should only be used when deriveNumDocsPerChunk is true or rawIndexWriterVersion is 4"); + } + _targetMaxChunkSize = + targetMaxChunkSize == null ? DataSizeUtils.fromBytes(DEFAULT_TARGET_MAX_CHUNK_SIZE) : targetMaxChunkSize; Review Comment: (minor) Make a constant for `DataSizeUtils.fromBytes(DEFAULT_TARGET_MAX_CHUNK_SIZE)` (maybe having both `DEFAULT_TARGET_MAX_CHUNK_SIZE` and `DEFAULT_TARGET_MAX_CHUNK_SIZE_BYTES`) ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/ForwardIndexConfig.java: ########## @@ -31,30 +31,45 @@ import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.FieldConfig.CompressionCodec; import org.apache.pinot.spi.config.table.IndexConfig; +import org.apache.pinot.spi.utils.DataSizeUtils; public class ForwardIndexConfig extends IndexConfig { public static final int DEFAULT_RAW_WRITER_VERSION = 2; - public static final ForwardIndexConfig DISABLED = new ForwardIndexConfig(true, null, null, null, null, null); + public static final int DEFAULT_TARGET_MAX_CHUNK_SIZE = 1024 * 1024; // 1MB + public static final int DEFAULT_TARGET_DOCS_PER_CHUNK = 1000; + public static final ForwardIndexConfig DISABLED = + new ForwardIndexConfig(true, null, null, null, null, null, null, null); public static final ForwardIndexConfig DEFAULT = new Builder().build(); @Nullable private final CompressionCodec _compressionCodec; private final boolean _deriveNumDocsPerChunk; private final int _rawIndexWriterVersion; + private final String _targetMaxChunkSize; Review Comment: Consider parsing `_targetMaxChunkSizeBytes` upfront to avoid illegal size ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/fwd/ForwardIndexUtils.java: ########## @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.creator.impl.fwd; + +public class ForwardIndexUtils { + private static final int TARGET_MIN_CHUNK_SIZE = 4 * 1024; + + private ForwardIndexUtils() { + } + + /** + * Get the dynamic target chunk size based on the maximum length of the values, target number of documents per chunk. + * + * If targetDocsPerChunk is negative, the target chunk size is the targetMaxChunkSizeBytes and chunk size + * shall not be dynamically chosen + * @param maxLength max length of the values + * @param targetDocsPerChunk target number of documents to store per chunk + * @param targetMaxChunkSizeBytes target max chunk size in bytes + */ + public static int getDynamicTargetChunkSize(int maxLength, int targetDocsPerChunk, int targetMaxChunkSizeBytes) { + if (targetDocsPerChunk < 0 || (long) maxLength * targetDocsPerChunk > Integer.MAX_VALUE) { + return targetMaxChunkSizeBytes; Review Comment: Maybe also put a lower bound to this? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org