This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new dbbf44ce15 Add splitPartWithLimit and splitPartFromEnd UDFs (#12437)
dbbf44ce15 is described below

commit dbbf44ce153bbe72fb3161f1977d0af1b403e06e
Author: deemoliu <qiao...@uber.com>
AuthorDate: Thu Apr 18 15:54:59 2024 -0700

    Add splitPartWithLimit and splitPartFromEnd UDFs (#12437)
---
 .../common/function/scalar/StringFunctions.java    | 26 ++++++++++--
 .../function/scalar/StringFunctionsTest.java       | 48 ++++++++++++++++++++++
 2 files changed, 71 insertions(+), 3 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 8ce77e8ccb..374917ec99 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -49,7 +49,6 @@ public class StringFunctions {
   private final static Pattern LTRIM = Pattern.compile("^\\s+");
   private final static Pattern RTRIM = Pattern.compile("\\s+$");
 
-
   /**
    * @see StringUtils#reverse(String)
    * @param input
@@ -585,14 +584,35 @@ public class StringFunctions {
    * TODO: Revisit if index should be one-based (both Presto and Postgres use 
one-based index, which starts with 1)
    * @param input
    * @param delimiter
-   * @param index
+   * @param index we allow negative value for index which indicates the index 
from the end.
    * @return splits string on specified delimiter and returns String at 
specified index from the split.
    */
   @ScalarFunction(names = {"splitPart", "split_part"})
   public static String splitPart(String input, String delimiter, int index) {
     String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter);
-    if (index < splitString.length) {
+    if (index >= 0 && index < splitString.length) {
+      return splitString[index];
+    } else if (index < 0 && index >= -splitString.length) {
+      return splitString[splitString.length + index];
+    } else {
+      return "null";
+    }
+  }
+
+  /**
+   * @param input the input String to be split into parts.
+   * @param delimiter the specified delimiter to split the input string.
+   * @param limit the max count of parts that the input string can be splitted 
into.
+   * @param index the specified index for the splitted parts to be returned.
+   * @return splits string on the delimiter with the limit count and returns 
String at specified index from the split.
+   */
+  @ScalarFunction
+  public static String splitPart(String input, String delimiter, int limit, 
int index) {
+    String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter, 
limit);
+    if (index >= 0 && index < splitString.length) {
       return splitString[index];
+    } else if (index < 0 && index >= -splitString.length) {
+      return splitString[splitString.length + index];
     } else {
       return "null";
     }
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
index 9129ccdc37..d75b8ada43 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java
@@ -26,6 +26,47 @@ import static org.testng.Assert.assertEquals;
 
 public class StringFunctionsTest {
 
+  @DataProvider(name = "splitPartTestCases")
+  public static Object[][] splitPartTestCases() {
+    return new Object[][]{
+        {"org.apache.pinot.common.function", ".", 0, 100, "org", "org"},
+        {"org.apache.pinot.common.function", ".", 10, 100, "null", "null"},
+        {"org.apache.pinot.common.function", ".", 1, 0, "apache", "apache"},
+        {"org.apache.pinot.common.function", ".", 1, 1, "apache", "null"},
+        {"org.apache.pinot.common.function", ".", 0, 1, "org", 
"org.apache.pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", 1, 2, "apache", 
"apache.pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", 2, 3, "pinot", 
"pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", 3, 4, "common", 
"common.function"},
+        {"org.apache.pinot.common.function", ".", 4, 5, "function", 
"function"},
+        {"org.apache.pinot.common.function", ".", 5, 6, "null", "null"},
+        {"org.apache.pinot.common.function", ".", 3, 3, "common", "null"},
+        {"+++++", "+", 0, 100, "", ""},
+        {"+++++", "+", 1, 100, "null", "null"},
+        // note that splitPart will split with limit first, then lookup by 
index from START or END.
+        {"org.apache.pinot.common.function", ".", -1, 100, "function", 
"function"},
+        {"org.apache.pinot.common.function", ".", -10, 100, "null", "null"},
+        {"org.apache.pinot.common.function", ".", -2, 0, "common", "common"}, 
// Case: limit=0 is not taking effect.
+        {"org.apache.pinot.common.function", ".", -1, 1, "function", 
"org.apache.pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", -2, 1, "common", "null"},
+        {"org.apache.pinot.common.function", ".", -1, 2, "function", 
"apache.pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", -2, 2, "common", "org"},
+        {"org.apache.pinot.common.function", ".", -1, 3, "function", 
"pinot.common.function"},
+        {"org.apache.pinot.common.function", ".", -3, 3, "pinot", "org"},
+        {"org.apache.pinot.common.function", ".", -4, 3, "apache", "null"},
+        {"org.apache.pinot.common.function", ".", -1, 4, "function", 
"common.function"},
+        {"org.apache.pinot.common.function", ".", -3, 4, "pinot", "apache"},
+        {"org.apache.pinot.common.function", ".", -4, 4, "apache", "org"},
+        {"org.apache.pinot.common.function", ".", -1, 5, "function", 
"function"},
+        {"org.apache.pinot.common.function", ".", -5, 5, "org", "org"},
+        {"org.apache.pinot.common.function", ".", -6, 5, "null", "null"},
+        {"org.apache.pinot.common.function", ".", -1, 6, "function", 
"function"},
+        {"org.apache.pinot.common.function", ".", -5, 6, "org", "org"},
+        {"org.apache.pinot.common.function", ".", -6, 6, "null", "null"},
+        {"+++++", "+", -1, 100, "", ""},
+        {"+++++", "+", -2, 100, "null", "null"},
+    };
+  }
+
   @DataProvider(name = "isJson")
   public static Object[][] isJsonTestCases() {
     return new Object[][]{
@@ -40,4 +81,11 @@ public class StringFunctionsTest {
   public void testIsJson(String input, boolean expectedValue) {
     assertEquals(StringFunctions.isJson(input), expectedValue);
   }
+
+  @Test(dataProvider = "splitPartTestCases")
+  public void testSplitPart(String input, String delimiter, int index, int 
limit, String expectedToken,
+      String expectedTokenWithLimitCounts) {
+    assertEquals(StringFunctions.splitPart(input, delimiter, index), 
expectedToken);
+    assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), 
expectedTokenWithLimitCounts);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to