Jackie-Jiang commented on code in PR #12392: URL: https://github.com/apache/pinot/pull/12392#discussion_r1572757028
########## pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java: ########## @@ -581,6 +584,111 @@ public static String[] split(String input, String delimiter, int limit) { return StringUtils.splitByWholeSeparator(input, delimiter, limit); } + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @return generate an array of prefix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] prefixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching + * @return generate an array of prefix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"prefix"}) Review Comment: Do you want to alias it to `prefix`? I don't think this is really `prefix` ########## pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java: ########## @@ -581,6 +584,111 @@ public static String[] split(String input, String delimiter, int limit) { return StringUtils.splitByWholeSeparator(input, delimiter, limit); } + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @return generate an array of prefix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] prefixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching + * @return generate an array of prefix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"prefix"}) + public static String[] prefixesWithPrefix(String input, int maxlength, @Nullable String prefix) { + if (prefix == null) { + return prefixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = prefix + input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @return generate an array of suffix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] suffixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx); + } + return suffixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @param suffix the suffix string to be appended for suffix strings generated. e.g. '$' for regex matching. + * @return generate an array of suffix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"suffix"}) + public static String[] suffixesWithSuffix(String input, int maxlength, @Nullable String suffix) { + if (suffix == null) { + return suffixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx) + suffix; + } + return suffixArr; + } + + /** + * @param input an input string for ngram generations. + * @param length the max length of the ngram for the string. + * @return generate an array of unique ngram of the string that length are exactly matching the specified length. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int length) { + if (length == 0 || length > input.length()) { + return new String[0]; + } + ObjectSet<String> ngramSet = new ObjectLinkedOpenHashSet<>(); + for (int i = 0; i < input.length() - length + 1; i++) { + ngramSet.add(input.substring(i, i + length)); + } + return ngramSet.toArray(new String[0]); + } + + /** + * @param input an input string for ngram generations. + * @param minGram the min length of the ngram for the string. + * @param maxGram the max length of the ngram for the string. + * @return generate an array of ngram of the string that length are within the specified range [minGram, maxGram]. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int minGram, int maxGram) { + ObjectSet<String> ngramSet = new ObjectLinkedOpenHashSet<>(); Review Comment: Same here ########## pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java: ########## @@ -581,6 +584,111 @@ public static String[] split(String input, String delimiter, int limit) { return StringUtils.splitByWholeSeparator(input, delimiter, limit); } + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @return generate an array of prefix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] prefixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching + * @return generate an array of prefix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"prefix"}) + public static String[] prefixesWithPrefix(String input, int maxlength, @Nullable String prefix) { + if (prefix == null) { + return prefixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = prefix + input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @return generate an array of suffix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] suffixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx); + } + return suffixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @param suffix the suffix string to be appended for suffix strings generated. e.g. '$' for regex matching. + * @return generate an array of suffix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"suffix"}) Review Comment: Same here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org