Jackie-Jiang commented on code in PR #10990: URL: https://github.com/apache/pinot/pull/10990#discussion_r1269931272
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { Review Comment: (minor) ```suggestion if (length <= METADATA_PROPERTY_LENGTH_LIMIT) { ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -605,6 +599,38 @@ static boolean isValidPropertyValue(String value) { return value.indexOf(',') == -1; } + /** + * Helper method to get the valid value for setting min/max. + */ + @VisibleForTesting + static String getValidPropertyValue(String value, boolean isMax, DataType dataType) { + String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax, dataType); + int length = valueWithinLengthLimit.length(); + + if (length > 0) { + // check and replace first character if it's a white space + if (Character.isWhitespace(valueWithinLengthLimit.charAt(0))) { + String unicodeValue = "\\u" + String.format("%04x", (int) valueWithinLengthLimit.charAt(0)); + valueWithinLengthLimit = unicodeValue + valueWithinLengthLimit.substring(1); + } + + // check and replace last character if it's a white space + if (Character.isWhitespace(valueWithinLengthLimit.charAt(valueWithinLengthLimit.length() - 1))) { + String unicodeValue = "\\u" + + String.format("%04x", (int) valueWithinLengthLimit.charAt(valueWithinLengthLimit.length() - 1)); + valueWithinLengthLimit = + valueWithinLengthLimit.substring(0, valueWithinLengthLimit.length() - 1) + unicodeValue; + } + } + + // removing the ',' from the value if it's present. + if (length > 0 && valueWithinLengthLimit.contains(",")) { + valueWithinLengthLimit = valueWithinLengthLimit.replace(",", "\\,"); + } Review Comment: (minor) Move this into the previous if branch (`length > 0` is duplicated) ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); + } else { + // property type is min value take first METADATA_PROPERTY_LENGTH_LIMIT - 1 characters + // and decrement the last character value by 1. + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) - 1); + } + break; + case BYTES: + byte[] shortByteValue = Arrays.copyOf(BytesUtils.toBytes(value), + (METADATA_PROPERTY_LENGTH_LIMIT / 2) + 1); + char ch; Review Comment: Why converting it to `char`? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); + } else { + // property type is min value take first METADATA_PROPERTY_LENGTH_LIMIT - 1 characters + // and decrement the last character value by 1. + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) - 1); + } + break; + case BYTES: + byte[] shortByteValue = Arrays.copyOf(BytesUtils.toBytes(value), + (METADATA_PROPERTY_LENGTH_LIMIT / 2) + 1); Review Comment: Why `+1`? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); + } else { + // property type is min value take first METADATA_PROPERTY_LENGTH_LIMIT - 1 characters + // and decrement the last character value by 1. + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) - 1); + } + break; + case BYTES: + byte[] shortByteValue = Arrays.copyOf(BytesUtils.toBytes(value), + (METADATA_PROPERTY_LENGTH_LIMIT / 2) + 1); + char ch; + if (isMax) { + ch = (char) ((char) shortByteValue[METADATA_PROPERTY_LENGTH_LIMIT / 2] + 1); + } else { + ch = (char) ((char) shortByteValue[METADATA_PROPERTY_LENGTH_LIMIT / 2] - 1); Review Comment: For min, we can just remove the trailing bytes ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); Review Comment: Is it possible that this `+1` can result in an invalid char? ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); + } else { + // property type is min value take first METADATA_PROPERTY_LENGTH_LIMIT - 1 characters + // and decrement the last character value by 1. + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) - 1); Review Comment: Here we should be able to just do `value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT)` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java: ########## @@ -619,4 +645,52 @@ public void close() creators.addAll(_dictionaryCreatorMap.values()); FileUtils.close(creators); } + + /** + * Returns the original string if its length is within the allowed limit. + * If the string's length exceeds the limit, + * it returns a truncated version of the string with maintaining min or max value. + * + */ + @VisibleForTesting + static String getValueWithinLengthLimit(String value, boolean isMax, DataType dataType) { + int length = value.length(); + + // if length is less, no need of trimming the value. + if (length < METADATA_PROPERTY_LENGTH_LIMIT) { + return value; + } + + String alteredValue; + // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than METADATA_PROPERTY_LENGTH_LIMIT is not possible. + switch (dataType) { + case STRING: + case JSON: + if (isMax) { + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) + 1); + } else { + // property type is min value take first METADATA_PROPERTY_LENGTH_LIMIT - 1 characters + // and decrement the last character value by 1. + alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT - 1) + + (char) (value.charAt(METADATA_PROPERTY_LENGTH_LIMIT - 1) - 1); + } + break; + case BYTES: + byte[] shortByteValue = Arrays.copyOf(BytesUtils.toBytes(value), + (METADATA_PROPERTY_LENGTH_LIMIT / 2) + 1); + char ch; + if (isMax) { + ch = (char) ((char) shortByteValue[METADATA_PROPERTY_LENGTH_LIMIT / 2] + 1); Review Comment: This can overflow -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org