Jackie-Jiang commented on code in PR #10990:
URL: https://github.com/apache/pinot/pull/10990#discussion_r1273020617


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -619,4 +645,57 @@ public void close()
     creators.addAll(_dictionaryCreatorMap.values());
     FileUtils.close(creators);
   }
+
+  /**
+   * Returns the original string if its length is within the allowed limit.
+   * If the string's length exceeds the limit,
+   * it returns a truncated version of the string with maintaining min or max 
value.
+   *
+   */
+  @VisibleForTesting
+  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType dataType) {
+    int length = value.length();
+
+    // if length is less, no need of trimming the value.
+    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
+      return value;
+    }
+
+    String alteredValue;
+    // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than 
METADATA_PROPERTY_LENGTH_LIMIT is not possible.
+    switch (dataType) {
+      case STRING:
+      case JSON:
+        if (isMax) {
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
+          // determining the index for the character having value less than 
'\uFFFF'
+          while (trimIndexValue < value.length() && 
value.charAt(trimIndexValue) == '\uFFFF') {
+            trimIndexValue++;
+          }
+          alteredValue = value.substring(0, trimIndexValue) + '\uFFFF'; // 
assigning the '\uFFFF' to make the value max.
+        } else {
+          alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
+        }
+        break;
+      case BYTES:
+        if (isMax) {
+          byte[] valueInByteArray = BytesUtils.toBytes(value);
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
+          // determining the index for the byte having value less than 0xFF
+          while (trimIndexValue < value.length() && 
(valueInByteArray[trimIndexValue] & 0xff) == 0xFF) {

Review Comment:
   Cannot use `value.length()`
   ```suggestion
             while (trimIndexValue < valueInByteArray.length && 
valueInByteArray[trimIndexValue]  == (byte) 0xFF) {
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -619,4 +645,57 @@ public void close()
     creators.addAll(_dictionaryCreatorMap.values());
     FileUtils.close(creators);
   }
+
+  /**
+   * Returns the original string if its length is within the allowed limit.
+   * If the string's length exceeds the limit,
+   * it returns a truncated version of the string with maintaining min or max 
value.
+   *
+   */
+  @VisibleForTesting
+  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType dataType) {
+    int length = value.length();
+
+    // if length is less, no need of trimming the value.
+    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
+      return value;
+    }
+
+    String alteredValue;
+    // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than 
METADATA_PROPERTY_LENGTH_LIMIT is not possible.
+    switch (dataType) {
+      case STRING:
+      case JSON:
+        if (isMax) {
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
+          // determining the index for the character having value less than 
'\uFFFF'
+          while (trimIndexValue < value.length() && 
value.charAt(trimIndexValue) == '\uFFFF') {
+            trimIndexValue++;
+          }
+          alteredValue = value.substring(0, trimIndexValue) + '\uFFFF'; // 
assigning the '\uFFFF' to make the value max.
+        } else {
+          alteredValue = value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
+        }
+        break;
+      case BYTES:
+        if (isMax) {
+          byte[] valueInByteArray = BytesUtils.toBytes(value);
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
+          // determining the index for the byte having value less than 0xFF
+          while (trimIndexValue < value.length() && 
(valueInByteArray[trimIndexValue] & 0xff) == 0xFF) {
+            trimIndexValue++;
+          }
+          byte[] shortByteValue = Arrays.copyOf(valueInByteArray, 
trimIndexValue + 1);
+          shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the 0xFF 
to make the value max.
+          alteredValue = BytesUtils.toHexString(shortByteValue);

Review Comment:
   ```suggestion
             if (trimIndexValue == valueInByteArray.length) {
               alteredValue = value;
             } else {
               byte[] shortByteValue = Arrays.copyOf(valueInByteArray, 
trimIndexValue + 1);
               shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the 
0xFF to make the value max.
               alteredValue = BytesUtils.toHexString(shortByteValue);
             }
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -619,4 +645,57 @@ public void close()
     creators.addAll(_dictionaryCreatorMap.values());
     FileUtils.close(creators);
   }
+
+  /**
+   * Returns the original string if its length is within the allowed limit.
+   * If the string's length exceeds the limit,
+   * it returns a truncated version of the string with maintaining min or max 
value.
+   *
+   */
+  @VisibleForTesting
+  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType dataType) {
+    int length = value.length();
+
+    // if length is less, no need of trimming the value.
+    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
+      return value;
+    }
+
+    String alteredValue;
+    // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than 
METADATA_PROPERTY_LENGTH_LIMIT is not possible.
+    switch (dataType) {
+      case STRING:
+      case JSON:
+        if (isMax) {
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
+          // determining the index for the character having value less than 
'\uFFFF'
+          while (trimIndexValue < value.length() && 
value.charAt(trimIndexValue) == '\uFFFF') {
+            trimIndexValue++;
+          }
+          alteredValue = value.substring(0, trimIndexValue) + '\uFFFF'; // 
assigning the '\uFFFF' to make the value max.

Review Comment:
   (minor)
   ```suggestion
             if (trimIndexValue == length) {
               alteredValue = value;
             } else {
               alteredValue = value.substring(0, trimIndexValue) + '\uFFFF'; // 
assigning the '\uFFFF' to make the value max.
             }
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -619,4 +645,57 @@ public void close()
     creators.addAll(_dictionaryCreatorMap.values());
     FileUtils.close(creators);
   }
+
+  /**
+   * Returns the original string if its length is within the allowed limit.
+   * If the string's length exceeds the limit,
+   * it returns a truncated version of the string with maintaining min or max 
value.
+   *
+   */
+  @VisibleForTesting
+  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType dataType) {
+    int length = value.length();
+
+    // if length is less, no need of trimming the value.
+    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
+      return value;
+    }
+
+    String alteredValue;
+    // For Numeric Data Type(INT, LONG, DOUBLE, FLOAT) value longer than 
METADATA_PROPERTY_LENGTH_LIMIT is not possible.
+    switch (dataType) {
+      case STRING:
+      case JSON:
+        if (isMax) {
+          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
+          // determining the index for the character having value less than 
'\uFFFF'
+          while (trimIndexValue < value.length() && 
value.charAt(trimIndexValue) == '\uFFFF') {

Review Comment:
   (minor)
   ```suggestion
             while (trimIndexValue < length && value.charAt(trimIndexValue) == 
'\uFFFF') {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to