This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new dbbf44ce15 Add splitPartWithLimit and splitPartFromEnd UDFs (#12437) dbbf44ce15 is described below commit dbbf44ce153bbe72fb3161f1977d0af1b403e06e Author: deemoliu <qiao...@uber.com> AuthorDate: Thu Apr 18 15:54:59 2024 -0700 Add splitPartWithLimit and splitPartFromEnd UDFs (#12437) --- .../common/function/scalar/StringFunctions.java | 26 ++++++++++-- .../function/scalar/StringFunctionsTest.java | 48 ++++++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 8ce77e8ccb..374917ec99 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -49,7 +49,6 @@ public class StringFunctions { private final static Pattern LTRIM = Pattern.compile("^\\s+"); private final static Pattern RTRIM = Pattern.compile("\\s+$"); - /** * @see StringUtils#reverse(String) * @param input @@ -585,14 +584,35 @@ public class StringFunctions { * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1) * @param input * @param delimiter - * @param index + * @param index we allow negative value for index which indicates the index from the end. * @return splits string on specified delimiter and returns String at specified index from the split. */ @ScalarFunction(names = {"splitPart", "split_part"}) public static String splitPart(String input, String delimiter, int index) { String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter); - if (index < splitString.length) { + if (index >= 0 && index < splitString.length) { + return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; + } else { + return "null"; + } + } + + /** + * @param input the input String to be split into parts. + * @param delimiter the specified delimiter to split the input string. + * @param limit the max count of parts that the input string can be splitted into. + * @param index the specified index for the splitted parts to be returned. + * @return splits string on the delimiter with the limit count and returns String at specified index from the split. + */ + @ScalarFunction + public static String splitPart(String input, String delimiter, int limit, int index) { + String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter, limit); + if (index >= 0 && index < splitString.length) { return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; } else { return "null"; } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java index 9129ccdc37..d75b8ada43 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java @@ -26,6 +26,47 @@ import static org.testng.Assert.assertEquals; public class StringFunctionsTest { + @DataProvider(name = "splitPartTestCases") + public static Object[][] splitPartTestCases() { + return new Object[][]{ + {"org.apache.pinot.common.function", ".", 0, 100, "org", "org"}, + {"org.apache.pinot.common.function", ".", 10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", 1, 0, "apache", "apache"}, + {"org.apache.pinot.common.function", ".", 1, 1, "apache", "null"}, + {"org.apache.pinot.common.function", ".", 0, 1, "org", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 1, 2, "apache", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 2, 3, "pinot", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 3, 4, "common", "common.function"}, + {"org.apache.pinot.common.function", ".", 4, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", 5, 6, "null", "null"}, + {"org.apache.pinot.common.function", ".", 3, 3, "common", "null"}, + {"+++++", "+", 0, 100, "", ""}, + {"+++++", "+", 1, 100, "null", "null"}, + // note that splitPart will split with limit first, then lookup by index from START or END. + {"org.apache.pinot.common.function", ".", -1, 100, "function", "function"}, + {"org.apache.pinot.common.function", ".", -10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", -2, 0, "common", "common"}, // Case: limit=0 is not taking effect. + {"org.apache.pinot.common.function", ".", -1, 1, "function", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 1, "common", "null"}, + {"org.apache.pinot.common.function", ".", -1, 2, "function", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 2, "common", "org"}, + {"org.apache.pinot.common.function", ".", -1, 3, "function", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -3, 3, "pinot", "org"}, + {"org.apache.pinot.common.function", ".", -4, 3, "apache", "null"}, + {"org.apache.pinot.common.function", ".", -1, 4, "function", "common.function"}, + {"org.apache.pinot.common.function", ".", -3, 4, "pinot", "apache"}, + {"org.apache.pinot.common.function", ".", -4, 4, "apache", "org"}, + {"org.apache.pinot.common.function", ".", -1, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 5, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 5, "null", "null"}, + {"org.apache.pinot.common.function", ".", -1, 6, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 6, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 6, "null", "null"}, + {"+++++", "+", -1, 100, "", ""}, + {"+++++", "+", -2, 100, "null", "null"}, + }; + } + @DataProvider(name = "isJson") public static Object[][] isJsonTestCases() { return new Object[][]{ @@ -40,4 +81,11 @@ public class StringFunctionsTest { public void testIsJson(String input, boolean expectedValue) { assertEquals(StringFunctions.isJson(input), expectedValue); } + + @Test(dataProvider = "splitPartTestCases") + public void testSplitPart(String input, String delimiter, int index, int limit, String expectedToken, + String expectedTokenWithLimitCounts) { + assertEquals(StringFunctions.splitPart(input, delimiter, index), expectedToken); + assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), expectedTokenWithLimitCounts); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org