sajjad-moradi commented on code in PR #10186: URL: https://github.com/apache/pinot/pull/10186#discussion_r1088497455
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java: ########## @@ -1243,18 +1246,60 @@ private boolean isAggregateMetricsEnabled() { // NOTE: Okay for single-writer @SuppressWarnings("NonAtomicOperationOnVolatileField") - private static class NumValuesInfo { + private static class ValuesInfo { volatile int _numValues = 0; volatile int _maxNumValuesPerMVEntry = -1; + volatile int _varByteMVMaxRowLengthInBytes = -1; - void updateSVEntry() { + void updateSVNumValues() { _numValues++; } - void updateMVEntry(int numValuesInMVEntry) { + void updateMVNumValues(int numValuesInMVEntry) { _numValues += numValuesInMVEntry; _maxNumValuesPerMVEntry = Math.max(_maxNumValuesPerMVEntry, numValuesInMVEntry); } + + /** + * When an MV VarByte column is created with noDict, the realtime segment is still created with a dictionary. + * When the realtime segment is converted to offline segment, the offline segment creates a noDict column. + * MultiValueVarByteRawIndexCreator requires the maxRowLengthInBytes. Refer to OSS issue + * https://github.com/apache/pinot/issues/10127 for more details. + */ + void updateVarByteMVMaxRowLengthInBytes(Object entry, DataType dataType) { + // MV support for BigDecimal is not available. + if (dataType != STRING && dataType != BYTES) { + return; + } + + Object[] values = (Object[]) entry; + int rowLength = 0; + + switch (dataType) { + case STRING: { + for (Object obj : values) { + String value = (String) obj; + int length = value.getBytes(UTF_8).length; + rowLength += length; + } + + _varByteMVMaxRowLengthInBytes = Math.max(_varByteMVMaxRowLengthInBytes, rowLength); + break; + } + case BYTES: { + for (Object obj : values) { + ByteArray value = new ByteArray((byte[]) obj); + int length = value.length(); + rowLength += length; + } + + _varByteMVMaxRowLengthInBytes = Math.max(_varByteMVMaxRowLengthInBytes, rowLength); + break; + } + default: + throw new IllegalStateException("Invalid type=" + dataType); Review Comment: I suggest using the switch-case statement inside the for-loop, so the rest of the code gets reused. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org