This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 2cd03495ff [multistage] initial commit to support all scalar/transform/filter match (#9707) 2cd03495ff is described below commit 2cd03495ffe64a790069cc35c9711d4b4ccc496b Author: Rong Rong <ro...@apache.org> AuthorDate: Mon Nov 21 00:04:13 2022 -0800 [multistage] initial commit to support all scalar/transform/filter match (#9707) * initial commit to support all scalar/transform/filter match * fix test issue * also fix the rest of the list * typo * address comments and fix tests * add more tests Co-authored-by: Rong Rong <ro...@startree.ai> --- .../pinot/common/function/FunctionRegistry.java | 57 ++++++++-- .../common/function/TransformFunctionType.java | 20 +++- .../function/scalar/ArithmeticFunctions.java | 14 +-- .../function/scalar/ComparisonFunctions.java | 10 +- .../common/function/scalar/DateTimeFunctions.java | 120 ++++----------------- .../common/function/scalar/ObjectFunctions.java | 8 +- .../common/function/scalar/StringFunctions.java | 8 +- .../function/FunctionDefinitionRegistryTest.java | 51 +++++++++ .../function/DateTimeTransformFunctionTest.java | 20 ++-- .../function/ExtractTransformFunctionTest.java | 2 +- .../query/parser/CalciteRexExpressionParser.java | 43 ++++++-- .../pinot/query/runtime/QueryRunnerTest.java | 32 ++++-- 12 files changed, 229 insertions(+), 156 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java index 82a16720e8..86f7434204 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java @@ -96,24 +96,31 @@ public class FunctionRegistry { */ public static void registerFunction(Method method, boolean nullableParameters) { registerFunction(method.getName(), method, nullableParameters); - - // Calcite ScalarFunctionImpl doesn't allow customized named functions. TODO: fix me. - if (method.getAnnotation(Deprecated.class) == null) { - FUNCTION_MAP.put(method.getName(), ScalarFunctionImpl.create(method)); - } } /** * Registers a method with the given function name. */ public static void registerFunction(String functionName, Method method, boolean nullableParameters) { + registerFunctionInfoMap(functionName, method, nullableParameters); + registerCalciteNamedFunctionMap(functionName, method, nullableParameters); + } + + private static void registerFunctionInfoMap(String functionName, Method method, boolean nullableParameters) { FunctionInfo functionInfo = new FunctionInfo(method, method.getDeclaringClass(), nullableParameters); String canonicalName = canonicalize(functionName); Map<Integer, FunctionInfo> functionInfoMap = FUNCTION_INFO_MAP.computeIfAbsent(canonicalName, k -> new HashMap<>()); - Preconditions.checkState(functionInfoMap.put(method.getParameterCount(), functionInfo) == null, + FunctionInfo existFunctionInfo = functionInfoMap.put(method.getParameterCount(), functionInfo); + Preconditions.checkState(existFunctionInfo == null || existFunctionInfo.getMethod() == functionInfo.getMethod(), "Function: %s with %s parameters is already registered", functionName, method.getParameterCount()); } + private static void registerCalciteNamedFunctionMap(String functionName, Method method, boolean nullableParameters) { + if (method.getAnnotation(Deprecated.class) == null) { + FUNCTION_MAP.put(functionName, ScalarFunctionImpl.create(method)); + } + } + public static Map<String, List<Function>> getRegisteredCalciteFunctionMap() { return FUNCTION_MAP.map(); } @@ -147,4 +154,42 @@ public class FunctionRegistry { private static String canonicalize(String functionName) { return StringUtils.remove(functionName, '_').toLowerCase(); } + + /** + * Placeholders for scalar function, they register and represents the signature for transform and filter predicate + * so that v2 engine can understand and plan them correctly. + */ + private static class PlaceholderScalarFunctions { + + @ScalarFunction(names = {"jsonExtractScalar", "json_extract_scalar"}) + public static Object jsonExtractScalar(String jsonFieldName, String jsonPath, String resultsType) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + + @ScalarFunction(names = {"jsonExtractScalar", "json_extract_scalar"}) + public static Object jsonExtractScalar(String jsonFieldName, String jsonPath, String resultsType, + Object defaultValue) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + + @ScalarFunction(names = {"jsonExtractKey", "json_extract_key"}) + public static String jsonExtractKey(String jsonFieldName, String jsonPath) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + + @ScalarFunction(names = {"textContains", "text_contains"}) + public static boolean textContains(String text, String pattern) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + + @ScalarFunction(names = {"textMatch", "text_match"}) + public static boolean textMatch(String text, String pattern) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + + @ScalarFunction(names = {"jsonMatch", "json_match"}) + public static boolean jsonMatch(String text, String pattern) { + throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + } + } } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java b/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java index 3f6ab6a144..6c61fc89f1 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java @@ -25,7 +25,7 @@ import java.util.List; public enum TransformFunctionType { - // Aggregation functions for single-valued columns + // arithmetic functions for single-valued columns ADD("add", "plus"), SUB("sub", "minus"), MULT("mult", "times"), @@ -48,6 +48,7 @@ public enum TransformFunctionType { LEAST("least"), GREATEST("greatest"), + // predicate functions EQUALS("equals"), NOT_EQUALS("not_equals"), GREATER_THAN("greater_than"), @@ -68,10 +69,17 @@ public enum TransformFunctionType { OR("or"), NOT("not"), // NOT operator doesn't cover the transform for NOT IN and NOT LIKE - CAST("cast"), + // CASE WHEN function parsed as 'CASE_WHEN' CASE("case"), + + // date type conversion functions + CAST("cast"), + + // string functions JSONEXTRACTSCALAR("jsonExtractScalar"), JSONEXTRACTKEY("jsonExtractKey"), + + // date time functions TIMECONVERT("timeConvert"), DATETIMECONVERT("dateTimeConvert"), DATETRUNC("dateTrunc"), @@ -87,6 +95,10 @@ public enum TransformFunctionType { MINUTE("minute"), SECOND("second"), MILLISECOND("millisecond"), + + EXTRACT("extract"), + + // array functions // The only column accepted by "cardinality" function is multi-value array, thus putting "cardinality" as alias. // TODO: once we support other types of multiset, we should make CARDINALITY its own function ARRAYLENGTH("arrayLength", "cardinality"), @@ -96,12 +108,12 @@ public enum TransformFunctionType { ARRAYSUM("arraySum"), VALUEIN("valueIn"), MAPVALUE("mapValue"), + + // special functions INIDSET("inIdSet"), LOOKUP("lookUp"), GROOVY("groovy"), - EXTRACT("extract"), - // Regexp functions REGEXP_EXTRACT("regexpExtract"), diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java index e6d73b0cc4..1a851aa404 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ArithmeticFunctions.java @@ -30,27 +30,27 @@ public class ArithmeticFunctions { private ArithmeticFunctions() { } - @ScalarFunction + @ScalarFunction(names = {"add", "plus"}) public static double plus(double a, double b) { return a + b; } - @ScalarFunction + @ScalarFunction(names = {"sub", "minus"}) public static double minus(double a, double b) { return a - b; } - @ScalarFunction + @ScalarFunction(names = {"mult", "times"}) public static double times(double a, double b) { return a * b; } - @ScalarFunction + @ScalarFunction(names = {"div", "divide"}) public static double divide(double a, double b) { return a / b; } - @ScalarFunction + @ScalarFunction(names = {"div", "divide"}) public static double divide(double a, double b, double defaultValue) { return (b == 0) ? defaultValue : a / b; } @@ -135,14 +135,14 @@ public class ArithmeticFunctions { // Big Decimal Implementation has been used here to avoid overflows // when multiplying by Math.pow(10, scale) for rounding - @ScalarFunction + @ScalarFunction(names = {"roundDecimal", "round_decimal"}) public static double roundDecimal(double a, int scale) { return BigDecimal.valueOf(a).setScale(scale, RoundingMode.HALF_UP).doubleValue(); } // TODO: The function should ideally be named 'round' // but it is not possible because of existing DateTimeFunction with same name. - @ScalarFunction + @ScalarFunction(names = {"roundDecimal", "round_decimal"}) public static double roundDecimal(double a) { return Math.round(a); } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ComparisonFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ComparisonFunctions.java index e27ff13d6f..c94d8c3fdb 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ComparisonFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ComparisonFunctions.java @@ -28,27 +28,27 @@ public class ComparisonFunctions { private ComparisonFunctions() { } - @ScalarFunction + @ScalarFunction(names = {"greater_than", "greaterThan"}) public static boolean greaterThan(double a, double b) { return a > b; } - @ScalarFunction + @ScalarFunction(names = {"greater_than_or_equal", "greaterThanOrEqual"}) public static boolean greaterThanOrEqual(double a, double b) { return a >= b; } - @ScalarFunction + @ScalarFunction(names = {"less_than", "lessThan"}) public static boolean lessThan(double a, double b) { return a < b; } - @ScalarFunction + @ScalarFunction(names = {"less_than_or_equal", "lessThanOrEqual"}) public static boolean lessThanOrEqual(double a, double b) { return a <= b; } - @ScalarFunction + @ScalarFunction(names = {"not_equals", "notEquals"}) public static boolean notEquals(double a, double b) { return Math.abs(a - b) >= DOUBLE_COMPARISON_TOLERANCE; } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java index f350288602..47f849e28b 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java @@ -355,7 +355,7 @@ public class DateTimeFunctions { /** * Returns the year of the ISO week from the given epoch millis in UTC timezone. */ - @ScalarFunction + @ScalarFunction(names = {"yearOfWeek", "year_of_week", "yow"}) public static int yearOfWeek(long millis) { return new DateTime(millis, DateTimeZone.UTC).getWeekyear(); } @@ -363,33 +363,17 @@ public class DateTimeFunctions { /** * Returns the year of the ISO week from the given epoch millis and timezone id. */ - @ScalarFunction + @ScalarFunction(names = {"yearOfWeek", "year_of_week", "yow"}) public static int yearOfWeek(long millis, String timezoneId) { return new DateTime(millis, DateTimeZone.forID(timezoneId)).getWeekyear(); } - /** - * An alias for yearOfWeek(). - */ - @ScalarFunction - public static int yow(long millis) { - return yearOfWeek(millis); - } - - /** - * An alias for yearOfWeek(). - */ - @ScalarFunction - public static int yow(long millis, String timezoneId) { - return yearOfWeek(millis, timezoneId); - } - /** * Returns the quarter of the year from the given epoch millis in UTC timezone. The value ranges from 1 to 4. */ @ScalarFunction public static int quarter(long millis) { - return (month(millis) - 1) / 3 + 1; + return (monthOfYear(millis) - 1) / 3 + 1; } /** @@ -397,61 +381,45 @@ public class DateTimeFunctions { */ @ScalarFunction public static int quarter(long millis, String timezoneId) { - return (month(millis, timezoneId) - 1) / 3 + 1; + return (monthOfYear(millis, timezoneId) - 1) / 3 + 1; } /** * Returns the month of the year from the given epoch millis in UTC timezone. The value ranges from 1 to 12. */ - @ScalarFunction - public static int month(long millis) { + @ScalarFunction(names = {"month", "month_of_year", "monthOfYear"}) + public static int monthOfYear(long millis) { return new DateTime(millis, DateTimeZone.UTC).getMonthOfYear(); } /** * Returns the month of the year from the given epoch millis and timezone id. The value ranges from 1 to 12. */ - @ScalarFunction - public static int month(long millis, String timezoneId) { + @ScalarFunction(names = {"month", "month_of_year", "monthOfYear"}) + public static int monthOfYear(long millis, String timezoneId) { return new DateTime(millis, DateTimeZone.forID(timezoneId)).getMonthOfYear(); } /** * Returns the ISO week of the year from the given epoch millis in UTC timezone.The value ranges from 1 to 53. */ - @ScalarFunction - public static int week(long millis) { + @ScalarFunction(names = {"weekOfYear", "week_of_year", "week"}) + public static int weekOfYear(long millis) { return new DateTime(millis, DateTimeZone.UTC).getWeekOfWeekyear(); } /** * Returns the ISO week of the year from the given epoch millis and timezone id. The value ranges from 1 to 53. */ - @ScalarFunction - public static int week(long millis, String timezoneId) { - return new DateTime(millis, DateTimeZone.forID(timezoneId)).getWeekOfWeekyear(); - } - - /** - * An alias for week(). - */ - @ScalarFunction - public static int weekOfYear(long millis) { - return week(millis); - } - - /** - * An alias for week(). - */ - @ScalarFunction + @ScalarFunction(names = {"weekOfYear", "week_of_year", "week"}) public static int weekOfYear(long millis, String timezoneId) { - return week(millis, timezoneId); + return new DateTime(millis, DateTimeZone.forID(timezoneId)).getWeekOfWeekyear(); } /** * Returns the day of the year from the given epoch millis in UTC timezone. The value ranges from 1 to 366. */ - @ScalarFunction + @ScalarFunction(names = {"dayOfYear", "day_of_year", "doy"}) public static int dayOfYear(long millis) { return new DateTime(millis, DateTimeZone.UTC).getDayOfYear(); } @@ -459,64 +427,32 @@ public class DateTimeFunctions { /** * Returns the day of the year from the given epoch millis and timezone id. The value ranges from 1 to 366. */ - @ScalarFunction + @ScalarFunction(names = {"dayOfYear", "day_of_year", "doy"}) public static int dayOfYear(long millis, String timezoneId) { return new DateTime(millis, DateTimeZone.forID(timezoneId)).getDayOfYear(); } - /** - * An alias for dayOfYear(). - */ - @ScalarFunction - public static int doy(long millis) { - return dayOfYear(millis); - } - - /** - * An alias for dayOfYear(). - */ - @ScalarFunction - public static int doy(long millis, String timezoneId) { - return dayOfYear(millis, timezoneId); - } - /** * Returns the day of the month from the given epoch millis in UTC timezone. The value ranges from 1 to 31. */ - @ScalarFunction - public static int day(long millis) { + @ScalarFunction(names = {"day", "dayOfMonth", "day_of_month"}) + public static int dayOfMonth(long millis) { return new DateTime(millis, DateTimeZone.UTC).getDayOfMonth(); } /** * Returns the day of the month from the given epoch millis and timezone id. The value ranges from 1 to 31. */ - @ScalarFunction - public static int day(long millis, String timezoneId) { - return new DateTime(millis, DateTimeZone.forID(timezoneId)).getDayOfMonth(); - } - - /** - * An alias for day(). - */ - @ScalarFunction - public static int dayOfMonth(long millis) { - return day(millis); - } - - /** - * An alias for day(). - */ - @ScalarFunction + @ScalarFunction(names = {"day", "dayOfMonth", "day_of_month"}) public static int dayOfMonth(long millis, String timezoneId) { - return day(millis, timezoneId); + return new DateTime(millis, DateTimeZone.forID(timezoneId)).getDayOfMonth(); } /** * Returns the day of the week from the given epoch millis in UTC timezone. The value ranges from 1 (Monday) to 7 * (Sunday). */ - @ScalarFunction + @ScalarFunction(names = {"dayOfWeek", "day_of_week", "dow"}) public static int dayOfWeek(long millis) { return new DateTime(millis, DateTimeZone.UTC).getDayOfWeek(); } @@ -525,27 +461,11 @@ public class DateTimeFunctions { * Returns the day of the week from the given epoch millis and timezone id. The value ranges from 1 (Monday) to 7 * (Sunday). */ - @ScalarFunction + @ScalarFunction(names = {"dayOfWeek", "day_of_week", "dow"}) public static int dayOfWeek(long millis, String timezoneId) { return new DateTime(millis, DateTimeZone.forID(timezoneId)).getDayOfWeek(); } - /** - * An alias for dayOfWeek(). - */ - @ScalarFunction - public static int dow(long millis) { - return dayOfWeek(millis); - } - - /** - * An alias for dayOfWeek(). - */ - @ScalarFunction - public static int dow(long millis, String timezoneId) { - return dayOfWeek(millis, timezoneId); - } - /** * Returns the hour of the day from the given epoch millis in UTC timezone. The value ranges from 0 to 23. */ diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ObjectFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ObjectFunctions.java index 944684ae1d..143edd740f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ObjectFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/ObjectFunctions.java @@ -27,17 +27,17 @@ public class ObjectFunctions { private ObjectFunctions() { } - @ScalarFunction(nullableParameters = true) + @ScalarFunction(nullableParameters = true, names = {"isNull", "is_null"}) public static boolean isNull(@Nullable Object obj) { return obj == null; } - @ScalarFunction(nullableParameters = true) + @ScalarFunction(nullableParameters = true, names = {"isNotNull", "is_not_null"}) public static boolean isNotNull(@Nullable Object obj) { return !isNull(obj); } - @ScalarFunction(nullableParameters = true) + @ScalarFunction(nullableParameters = true, names = {"isDistinctFrom", "is_distinct_from"}) public static boolean isDistinctFrom(@Nullable Object obj1, @Nullable Object obj2) { if (obj1 == null && obj2 == null) { return false; @@ -48,7 +48,7 @@ public class ObjectFunctions { return !obj1.equals(obj2); } - @ScalarFunction(nullableParameters = true) + @ScalarFunction(nullableParameters = true, names = {"isNotDistinctFrom", "is_not_distinct_from"}) public static boolean isNotDistinctFrom(@Nullable Object obj1, @Nullable Object obj2) { return !isDistinctFrom(obj1, obj2); } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index d06cbb9a46..45b5767599 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -211,7 +211,7 @@ public class StringFunctions { * @param regexp * @return the matched result. */ - @ScalarFunction + @ScalarFunction(names = {"regexp_extract", "regexpExtract"}) public static String regexpExtract(String value, String regexp) { return regexpExtract(value, regexp, 0, ""); } @@ -223,7 +223,7 @@ public class StringFunctions { * @param group * @return the matched result. */ - @ScalarFunction + @ScalarFunction(names = {"regexp_extract", "regexpExtract"}) public static String regexpExtract(String value, String regexp, int group) { return regexpExtract(value, regexp, group, ""); } @@ -236,7 +236,7 @@ public class StringFunctions { * @param defaultValue the default value if no match found * @return the matched result */ - @ScalarFunction + @ScalarFunction(names = {"regexp_extract", "regexpExtract"}) public static String regexpExtract(String value, String regexp, int group, String defaultValue) { Pattern p = Pattern.compile(regexp); Matcher matcher = p.matcher(value); @@ -728,7 +728,7 @@ public class StringFunctions { return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, occurence, ""); } - @ScalarFunction + @ScalarFunction(names = {"regexpLike", "regexp_like"}) public static boolean regexpLike(String inputStr, String regexPatternStr) { Pattern pattern = Pattern.compile(regexPatternStr, Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE); return pattern.matcher(inputStr).find(); diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java index 1dd6fe939e..f473a01011 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/FunctionDefinitionRegistryTest.java @@ -18,8 +18,14 @@ */ package org.apache.pinot.common.function; +import com.google.common.collect.ImmutableList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.annotations.ScalarFunction; +import org.apache.pinot.sql.FilterKind; import org.testng.annotations.Test; import static org.testng.Assert.assertFalse; @@ -29,6 +35,17 @@ import static org.testng.Assert.assertTrue; public class FunctionDefinitionRegistryTest { + private static final int MAX_NARG = 10; + private static final List<Pattern> IGNORED_TRANSFORM_FUNCTION_SIGNATURE = ImmutableList.of( + Pattern.compile("array.*"), // array related functions are not supported at the moment + Pattern.compile("st_.*")// all ST GEO features are ignored. + ); + private static final List<String> IGNORED_FUNCTION_NAMES = ImmutableList.of( + // functions we are not supporting post transform anyway + "valuein", "mapvalue", "inidset", "lookup", "groovy", "scalar", "geotoh3", "case", "not_in", "timeconvert", + // functions not needed for register b/c they are in std sql table or they will not be composed directly. + "in", "and", "or", "not", "range", "extract" + ); @Test public void testIsAggFunc() { @@ -43,6 +60,40 @@ public class FunctionDefinitionRegistryTest { assertFalse(AggregationFunctionType.isAggregationFunction("toEpochSeconds")); } + @Test + public void testCalciteFunctionMapAllRegistered() { + Set<String> registeredCalciteFunctionNameIgnoreCase = new HashSet<>(); + for (String funcNames : FunctionRegistry.getRegisteredCalciteFunctionNames()) { + registeredCalciteFunctionNameIgnoreCase.add(funcNames.toLowerCase()); + } + for (TransformFunctionType enumType : TransformFunctionType.values()) { + if (!isIgnored(enumType.getName().toLowerCase())) { + for (String funcName : enumType.getAliases()) { + assertTrue(registeredCalciteFunctionNameIgnoreCase.contains(funcName.toLowerCase()), + "Unable to find transform function signature for: " + funcName); + } + } + } + for (FilterKind enumType : FilterKind.values()) { + if (!isIgnored(enumType.name().toLowerCase())) { + assertTrue(registeredCalciteFunctionNameIgnoreCase.contains(enumType.name().toLowerCase()), + "Unable to find filter function signature for: " + enumType.name()); + } + } + } + + private boolean isIgnored(String funcName) { + if (IGNORED_FUNCTION_NAMES.contains(funcName)) { + return true; + } + for (Pattern signature : IGNORED_TRANSFORM_FUNCTION_SIGNATURE) { + if (signature.matcher(funcName).find()) { + return true; + } + } + return false; + } + @ScalarFunction(names = {"testFunc1", "testFunc2"}) public static String testScalarFunction(long randomArg1, String randomArg2) { return null; diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/DateTimeTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/DateTimeTransformFunctionTest.java index 5cf344a984..7918f269fc 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/DateTimeTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/DateTimeTransformFunctionTest.java @@ -36,15 +36,15 @@ public class DateTimeTransformFunctionTest extends BaseTransformFunctionTest { return new Object[][]{ {"year", (LongToIntFunction) DateTimeFunctions::year, DateTimeTransformFunction.Year.class}, {"yearOfWeek", (LongToIntFunction) DateTimeFunctions::yearOfWeek, DateTimeTransformFunction.YearOfWeek.class}, - {"yow", (LongToIntFunction) DateTimeFunctions::yow, DateTimeTransformFunction.YearOfWeek.class}, - {"month", (LongToIntFunction) DateTimeFunctions::month, DateTimeTransformFunction.Month.class}, - {"week", (LongToIntFunction) DateTimeFunctions::week, DateTimeTransformFunction.WeekOfYear.class}, + {"yow", (LongToIntFunction) DateTimeFunctions::yearOfWeek, DateTimeTransformFunction.YearOfWeek.class}, + {"month", (LongToIntFunction) DateTimeFunctions::monthOfYear, DateTimeTransformFunction.Month.class}, + {"week", (LongToIntFunction) DateTimeFunctions::weekOfYear, DateTimeTransformFunction.WeekOfYear.class}, {"weekOfYear", (LongToIntFunction) DateTimeFunctions::weekOfYear, DateTimeTransformFunction.WeekOfYear.class}, {"quarter", (LongToIntFunction) DateTimeFunctions::quarter, DateTimeTransformFunction.Quarter.class}, {"dayOfWeek", (LongToIntFunction) DateTimeFunctions::dayOfWeek, DateTimeTransformFunction.DayOfWeek.class}, - {"dow", (LongToIntFunction) DateTimeFunctions::dow, DateTimeTransformFunction.DayOfWeek.class}, + {"dow", (LongToIntFunction) DateTimeFunctions::dayOfWeek, DateTimeTransformFunction.DayOfWeek.class}, {"dayOfYear", (LongToIntFunction) DateTimeFunctions::dayOfYear, DateTimeTransformFunction.DayOfYear.class}, - {"doy", (LongToIntFunction) DateTimeFunctions::doy, DateTimeTransformFunction.DayOfYear.class}, + {"doy", (LongToIntFunction) DateTimeFunctions::dayOfYear, DateTimeTransformFunction.DayOfYear.class}, {"dayOfMonth", (LongToIntFunction) DateTimeFunctions::dayOfMonth, DateTimeTransformFunction.DayOfMonth.class}, {"day", (LongToIntFunction) DateTimeFunctions::dayOfMonth, DateTimeTransformFunction.DayOfMonth.class}, {"hour", (LongToIntFunction) DateTimeFunctions::hour, DateTimeTransformFunction.Hour.class}, @@ -59,15 +59,15 @@ public class DateTimeTransformFunctionTest extends BaseTransformFunctionTest { return new Object[][]{ {"year", (ZonedTimeFunction) DateTimeFunctions::year, DateTimeTransformFunction.Year.class}, {"yearOfWeek", (ZonedTimeFunction) DateTimeFunctions::yearOfWeek, DateTimeTransformFunction.YearOfWeek.class}, - {"yow", (ZonedTimeFunction) DateTimeFunctions::yow, DateTimeTransformFunction.YearOfWeek.class}, - {"month", (ZonedTimeFunction) DateTimeFunctions::month, DateTimeTransformFunction.Month.class}, - {"week", (ZonedTimeFunction) DateTimeFunctions::week, DateTimeTransformFunction.WeekOfYear.class}, + {"yow", (ZonedTimeFunction) DateTimeFunctions::yearOfWeek, DateTimeTransformFunction.YearOfWeek.class}, + {"month", (ZonedTimeFunction) DateTimeFunctions::monthOfYear, DateTimeTransformFunction.Month.class}, + {"week", (ZonedTimeFunction) DateTimeFunctions::weekOfYear, DateTimeTransformFunction.WeekOfYear.class}, {"weekOfYear", (ZonedTimeFunction) DateTimeFunctions::weekOfYear, DateTimeTransformFunction.WeekOfYear.class}, {"quarter", (ZonedTimeFunction) DateTimeFunctions::quarter, DateTimeTransformFunction.Quarter.class}, {"dayOfWeek", (ZonedTimeFunction) DateTimeFunctions::dayOfWeek, DateTimeTransformFunction.DayOfWeek.class}, - {"dow", (ZonedTimeFunction) DateTimeFunctions::dow, DateTimeTransformFunction.DayOfWeek.class}, + {"dow", (ZonedTimeFunction) DateTimeFunctions::dayOfWeek, DateTimeTransformFunction.DayOfWeek.class}, {"dayOfYear", (ZonedTimeFunction) DateTimeFunctions::dayOfYear, DateTimeTransformFunction.DayOfYear.class}, - {"doy", (ZonedTimeFunction) DateTimeFunctions::doy, DateTimeTransformFunction.DayOfYear.class}, + {"doy", (ZonedTimeFunction) DateTimeFunctions::dayOfYear, DateTimeTransformFunction.DayOfYear.class}, {"dayOfMonth", (ZonedTimeFunction) DateTimeFunctions::dayOfMonth, DateTimeTransformFunction.DayOfMonth.class}, {"day", (ZonedTimeFunction) DateTimeFunctions::dayOfMonth, DateTimeTransformFunction.DayOfMonth.class}, {"hour", (ZonedTimeFunction) DateTimeFunctions::hour, DateTimeTransformFunction.Hour.class}, diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ExtractTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ExtractTransformFunctionTest.java index 61d580c9a1..1c78b0def8 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ExtractTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ExtractTransformFunctionTest.java @@ -36,7 +36,7 @@ public class ExtractTransformFunctionTest extends BaseTransformFunctionTest { return new Object[][]{ //@formatter:off {"year", (LongToIntFunction) DateTimeFunctions::year}, - {"month", (LongToIntFunction) DateTimeFunctions::month}, + {"month", (LongToIntFunction) DateTimeFunctions::monthOfYear}, {"day", (LongToIntFunction) DateTimeFunctions::dayOfMonth}, {"hour", (LongToIntFunction) DateTimeFunctions::hour}, {"minute", (LongToIntFunction) DateTimeFunctions::minute}, diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/parser/CalciteRexExpressionParser.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/parser/CalciteRexExpressionParser.java index f9021154a9..25a48bec1a 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/parser/CalciteRexExpressionParser.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/parser/CalciteRexExpressionParser.java @@ -19,8 +19,10 @@ package org.apache.pinot.query.parser; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.sql.SqlKind; import org.apache.pinot.common.request.Expression; @@ -30,6 +32,7 @@ import org.apache.pinot.common.request.PinotQuery; import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.query.planner.logical.RexExpression; import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.sql.FilterKind; import org.apache.pinot.sql.parsers.SqlCompilationException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +47,14 @@ import org.slf4j.LoggerFactory; */ public class CalciteRexExpressionParser { private static final Logger LOGGER = LoggerFactory.getLogger(CalciteRexExpressionParser.class); + private static final Map<String, String> CANONICAL_NAME_TO_SPECIAL_KEY_MAP; + + static { + CANONICAL_NAME_TO_SPECIAL_KEY_MAP = new HashMap<>(); + for (FilterKind filterKind : FilterKind.values()) { + CANONICAL_NAME_TO_SPECIAL_KEY_MAP.put(canonicalizeFunctionNameInternal(filterKind.name()), filterKind.name()); + } + } private CalciteRexExpressionParser() { // do not instantiate. @@ -103,12 +114,12 @@ public class CalciteRexExpressionParser { Expression expression; switch (direction) { case DESCENDING: - expression = RequestUtils.getFunctionExpression("DESC"); + expression = getFunctionExpression("DESC"); expression.getFunctionCall().addToOperands(toExpression(rexNode, pinotQuery)); break; case ASCENDING: default: - expression = RequestUtils.getFunctionExpression("ASC"); + expression = getFunctionExpression("ASC"); expression.getFunctionCall().addToOperands(toExpression(rexNode, pinotQuery)); break; } @@ -118,7 +129,7 @@ public class CalciteRexExpressionParser { private static Expression convertDistinctAndSelectListToFunctionExpression(RexExpression.FunctionCall rexCall, PinotQuery pinotQuery) { String functionName = AggregationFunctionType.DISTINCT.getName(); - Expression functionExpression = RequestUtils.getFunctionExpression(functionName); + Expression functionExpression = getFunctionExpression(functionName); for (RexExpression node : rexCall.getFunctionOperands()) { Expression columnExpression = toExpression(node, pinotQuery); if (columnExpression.getType() == ExpressionType.IDENTIFIER && columnExpression.getIdentifier().getName() @@ -152,6 +163,8 @@ public class CalciteRexExpressionParser { } private static Expression rexLiteralToExpression(RexExpression.Literal rexLiteral) { + // TODO: currently literals are encoded as strings for V1, remove this and use directly literal type when it + // supports strong-type in V1. return RequestUtils.getLiteralExpression(rexLiteral.getValue()); } @@ -182,7 +195,7 @@ public class CalciteRexExpressionParser { operands.add(toExpression(childNode, pinotQuery)); } ParserUtils.validateFunction(functionName, operands); - Expression functionExpression = RequestUtils.getFunctionExpression(canonicalizeFunctionName(functionName)); + Expression functionExpression = getFunctionExpression(functionName); functionExpression.getFunctionCall().setOperands(operands); return functionExpression; } @@ -200,7 +213,7 @@ public class CalciteRexExpressionParser { operands.add(toExpression(childNode, pinotQuery)); } } - Expression andExpression = RequestUtils.getFunctionExpression(SqlKind.AND.name()); + Expression andExpression = getFunctionExpression(SqlKind.AND.name()); andExpression.getFunctionCall().setOperands(operands); return andExpression; } @@ -218,19 +231,31 @@ public class CalciteRexExpressionParser { operands.add(toExpression(childNode, pinotQuery)); } } - Expression andExpression = RequestUtils.getFunctionExpression(SqlKind.OR.name()); + Expression andExpression = getFunctionExpression(SqlKind.OR.name()); andExpression.getFunctionCall().setOperands(operands); return andExpression; } + private static Expression getFunctionExpression(String canonicalName) { + Expression expression = new Expression(ExpressionType.FUNCTION); + Function function = new Function(canonicalizeFunctionName(canonicalName)); + expression.setFunctionCall(function); + return expression; + } + + private static String canonicalizeFunctionName(String functionName) { + String canonicalizeName = canonicalizeFunctionNameInternal(functionName); + return CANONICAL_NAME_TO_SPECIAL_KEY_MAP.getOrDefault(canonicalizeName, canonicalizeName); + } + /** * Canonicalize Calcite generated Logical function names. */ - private static String canonicalizeFunctionName(String functionName) { + private static String canonicalizeFunctionNameInternal(String functionName) { if (functionName.endsWith("0")) { - return functionName.substring(0, functionName.length() - 1); + return functionName.substring(0, functionName.length() - 1).replace("_", "").toLowerCase(); } else { - return functionName; + return functionName.replace("_", "").toLowerCase(); } } } diff --git a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java index b5c707cb59..de4a1fd2b5 100644 --- a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java +++ b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/QueryRunnerTest.java @@ -223,9 +223,18 @@ public class QueryRunnerTest extends QueryRunnerTestBase { + "ON a.col1 = b.col1 AND a.col2 = b.col2", 15}, new Object[]{"SELECT dateTrunc('DAY', CAST(MAX(a.col3) AS BIGINT)) FROM a", 1}, - // test regexpLike + // ScalarFunction + // test function can be used in predicate/leaf/intermediate stage (using regexpLike) new Object[]{"SELECT a.col1, b.col1 FROM a JOIN b ON a.col3 = b.col3 WHERE regexpLike(a.col2, b.col1)", 9}, + new Object[]{"SELECT a.col1, b.col1 FROM a JOIN b ON a.col3 = b.col3 WHERE regexp_like(a.col2, b.col1)", 9}, new Object[]{"SELECT regexpLike(a.col1, b.col1) FROM a JOIN b ON a.col3 = b.col3", 39}, + new Object[]{"SELECT regexp_like(a.col1, b.col1) FROM a JOIN b ON a.col3 = b.col3", 39}, + + // test function with @ScalarFunction annotation and alias works (using round_decimal) + new Object[]{"SELECT roundDecimal(col3) FROM a", 15}, + new Object[]{"SELECT round_decimal(col3) FROM a", 15}, + new Object[]{"SELECT col1, roundDecimal(COUNT(*)) FROM a GROUP BY col1", 5}, + new Object[]{"SELECT col1, round_decimal(COUNT(*)) FROM a GROUP BY col1", 5}, }; } @@ -235,11 +244,22 @@ public class QueryRunnerTest extends QueryRunnerTestBase { // Function with incorrect argument signature should throw runtime exception when casting string to numeric new Object[]{"SELECT least(a.col2, b.col3) FROM a JOIN b ON a.col1 = b.col1", "For input string:"}, - // TODO: this error is thrown but not returned through mailbox. need another test for asserting failure - // during stage runtime init. - // standard SqlOpTable function that runs out of signature list in actual impl throws not found exception - // new Object[]{"SELECT CASE WHEN col3 > 10 THEN 1 WHEN col3 > 20 THEN 2 WHEN col3 > 30 THEN 3 " - // + "WHEN col3 > 40 THEN 4 WHEN col3 > 50 THEN 5 WHEN col3 > 60 THEN '6' ELSE 0 END FROM a", "caseWhen"}, + + // Scalar function that doesn't have a valid use should throw an exception on the leaf stage + // - predicate only functions: + new Object[]{"SELECT * FROM a WHERE textMatch(col1, 'f')", "without text index"}, + new Object[]{"SELECT * FROM a WHERE text_match(col1, 'f')", "without text index"}, + new Object[]{"SELECT * FROM a WHERE textContains(col1, 'f')", "supported only on native text index"}, + new Object[]{"SELECT * FROM a WHERE text_contains(col1, 'f')", "supported only on native text index"}, + + // - transform only functions + new Object[]{"SELECT jsonExtractKey(col1, 'path') FROM a", "was expecting (JSON String"}, + new Object[]{"SELECT json_extract_key(col1, 'path') FROM a", "was expecting (JSON String"}, + + // - PlaceholderScalarFunction registered will throw on intermediate stage, but works on leaf stage. + new Object[]{"SELECT CAST(jsonExtractScalar(col1, 'path', 'INT') AS INT) FROM a", "Illegal Json Path"}, + new Object[]{"SELECT CAST(json_extract_scalar(a.col1, b.col2, 'INT') AS INT) FROM a JOIN b ON a.col1 = b.col1", + "PlaceholderScalarFunctions.jsonExtractScalar"}, }; } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org