This is an automated email from the ASF dual-hosted git repository. liyang pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit a8ad269b3c37d8670f0c5205ecef9d60a2796df3 Author: lixiang <447399...@qq.com> AuthorDate: Fri Jul 21 15:29:49 2023 +0800 KYLIN-5766 Normalize query cache key --- .../org/apache/kylin/rest/request/SQLRequest.java | 4 +- .../apache/kylin/query/util/WhiteSpaceParser.jj | 887 +++++++++++++++++++++ .../apache/kylin/rest/service/QueryService.java | 6 + .../kylin/query/util/WhiteSpaceParserTest.java | 110 +++ 4 files changed, 1006 insertions(+), 1 deletion(-) diff --git a/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java b/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java index 58d9a82596..2bf34c75c5 100644 --- a/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java +++ b/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java @@ -77,13 +77,15 @@ public class SQLRequest implements Serializable, ProjectInsensitiveRequest, Vali @Size(max = 256) private String user_defined_tag; + private String normalizedSql; + protected volatile Object cacheKey = null; public Object getCacheKey() { if (cacheKey != null) return cacheKey; - cacheKey = Lists.newArrayList(sql.replaceAll("[ ]", " "), // + cacheKey = Lists.newArrayList(normalizedSql, // project, // offset, // limit, // diff --git a/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj b/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj new file mode 100644 index 0000000000..c07936792a --- /dev/null +++ b/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj @@ -0,0 +1,887 @@ +options { + STATIC = false; + IGNORE_CASE = true; + UNICODE_INPUT = true; +} + +PARSER_BEGIN(WhiteSpaceParser) +package org.apache.kylin.query.util; + +import java.io.StringReader; +import java.nio.charset.Charset; +import java.util.List; +import java.util.Locale; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; +import java.util.Scanner; + +import org.apache.commons.lang3.StringUtils; + +import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet; +import org.apache.kylin.guava30.shaded.common.collect.Lists; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class WhiteSpaceParser { + + private static final String CEIL_FLOOR_EXCEPTION_MSG = "%s(exp to timeunit) encounters timeunit %s, but only supports %s"; + private static final String EXTRACT_FROM_EXCEPTION_MSG = "extract(timeunit from exp) encounters timeunit %s, but only supports %s"; + private static final String TIMESTAMP_DIFF_ADD_EXCEPTION_MSG = "%s(timeunit, exp1, exp2) encounters timeunit %s, but only supports %s"; + private static final String COMMA_STR = ","; + private static final String SPACE_STR = " "; + private static final char SPACE_CHAR = ' '; + private static final String QUOTE_STR = "'"; + private static final Set<String> OPERATORS = new HashSet(Arrays.asList("+", "-", "*", "/", "%", "=",">=", "<=", "!=", "<>", "||")); + + private EscapeDialect dialect = EscapeDialect.DEFAULT; + + public static void main(String[] args) throws Exception { + String sql = "select * col1 ,\ncol2,col3 from \n tableA \\r a left \t\n table\rb ( select * from t) "; + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(new StringReader(sql)); + System.out.println("transformed sql is :" + whiteSpaceParser.parse()); + } + + private static String normalize(String str) { + if (str.equals(COMMA_STR)) { + str = str + SPACE_STR; + } else if (OPERATORS.contains(str)) { + str = SPACE_STR + str + SPACE_STR; + } + return str; + } + + public WhiteSpaceParser(EscapeDialect newDialect, String SQL) { + this(new StringReader(SQL)); + this.dialect = newDialect; + } + + private static boolean needDropPreSpace(String str) { + return COMMA_STR.equals(str) || OPERATORS.contains(str) + || (StringUtils.isBlank(str) && !SPACE_STR.equals(str)); + } + + private static boolean skipSpace(String str) { + return COMMA_STR.equals(str) || OPERATORS.contains(str); + } + + private static void appendExpression(StringBuilder sb, String innerString) { + if (sb.length() != 0) { + int last = sb.length() - 1; + if (sb.charAt(last) != SPACE_CHAR && innerString.startsWith(QUOTE_STR)) { + sb.append(SPACE_STR); + } + } + + int len = sb.length(); + if (!SPACE_STR.equals(innerString) || (len != 0 && sb.charAt(len - 1) != SPACE_CHAR)) { + String tmp = normalize(innerString); + if (skipSpace(innerString)) { + sb.append(tmp); + } else { + sb.append(tmp).append(SPACE_STR); + } + } + } + +} + +PARSER_END(WhiteSpaceParser) + +<ESCAPE, FUNCTION, TIMEUNIT> +SKIP : { + <([" ", "\t", "\n", "\r", "\f"])+ > +} + +<NILADIC> +SKIP : +{ + <PAREN: ((<SPACE>)* <LPAREN> (<SPACE>)* <RPAREN>)? > : DEFAULT +} + +< DEFAULT > +TOKEN : +{ + < LBRACE : "{" > : ESCAPE +| < RBRACE : "}" > +| < RPAREN : ")" > +| < LPAREN : "(" > +| < COMMA : "," > +| < SPACE : ([" ", "\t", "\n", "\r", "\f"])+ > +| < QUOTE: "'" > +// possible strings copied from calcite +| + < BINARY_STRING_LITERAL: ["x","X"] <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> > +| + < QUOTED_STRING: <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> > +| + < PREFIXED_STRING_LITERAL: ("_" <CHARSETNAME> | "N") <QUOTED_STRING> > +| + < UNICODE_STRING_LITERAL: "U" "&" <QUOTED_STRING> > +| + < #CHARSETNAME: (["a"-"z","A"-"Z","0"-"9"]) + (["a"-"z","A"-"Z","0"-"9",":",".","-","_"])* + > +| < DOUBLE_QUOTE : "\""> +| < QUOTED_IDENTIFIER: <DOUBLE_QUOTE> ( (~["\"","\n","\r"]) | ("\"\"") )+ <DOUBLE_QUOTE> > +| < HINT: "/*" (~["\n", "\r"])* "*/" > +| < CUBE_PRIORITY: "--" ([" "])* "CubePriority(" (~["\n", "\r", ")"])* ")" ([" "])* "\n" > +| < UNSIGNED_INTEGER_LITERAL: (["0"-"9"])+ > +| < APPROX_NUMERIC_LITERAL:(<UNSIGNED_INTEGER_LITERAL> | <DECIMAL_NUMERIC_LITERAL>) <EXPONENT> > +| < DECIMAL_NUMERIC_LITERAL:(["0"-"9"])+(".")?(["0"-"9"])*| "."(["0"-"9"])+> +| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > +| < DOT : "." > +| < CEIL : "CEIL" > +| < CEIL_DATETIME : "CEIL_DATETIME" > +| < FLOOR_DATETIME : "FLOOR_DATETIME" > +| < FLOOR : "FLOOR" > +| < TO : "TO" > +| < SUBSTRING : "SUBSTRING" > +| < SUBSTR : "SUBSTR" > +| < FROM : "FROM" > +| < FOR : "FOR" > +| < CAST : "CAST" > +| < AS : "AS" > +| < TIMESTAMPADD : "TIMESTAMPADD" > +| < TIMESTAMPDIFF : "TIMESTAMPDIFF" > +| < EXTRACT : "EXTRACT" > +| < OVERLAY : "OVERLAY" > +| < PLACING : "PLACING" > +| < GROUPING : "GROUPING" > +| < SETS : "SETS" > +| < PI : "PI" > : NILADIC +| < ANY : (~[ "{", "}", " ", "(", ")", ",", "'", "\t", "\n", "\r", "\"", ".", + "=", ">", "<", "?", ":", "!", "+", "-", "*", "/", "%", "|", "^", "$" ])+ > +} + +< DEFAULT > +TOKEN : +{ + < EQ: "=" > +| < GT: ">" > +| < LT: "<" > +| < HOOK: "?" > +| < COLON: ":" > +| < LE: "<=" > +| < GE: ">=" > +| < NE: "<>" > +| < BANGEQUAL : "!=" > +| < NOT: "!" > +| < PLUS: "+" > +| < MINUS: "-" > +| < STAR: "*" > +| < SLASH: "/" > +| < MOD: "%" > +| < CONCAT: "||" > +| < VERTICAL_BAR: "|" > +| < CARET: "^" > +| < DOLLAR: "$" > +} + +< ESCAPE > +TOKEN : +{ + < FN : "fn" > : FUNCTION +| < TS : "ts" > : DEFAULT +| < D : "d" > : DEFAULT +| < DATE : "date" > : DEFAULT +| < T : "t" > : DEFAULT +| < TIME : "time" > : DEFAULT +} + +< FUNCTION > +TOKEN : +{ + < FUNCTION_NAME : <ANY> > : DEFAULT +} + + +String parse(): +{ + StringBuilder transformedStr = new StringBuilder(); + String inner; +} +{ + ( + LOOKAHEAD(2) + inner = Expression() + { + if(transformedStr.length() != 0) { + int lastIndex = transformedStr.length() - 1; + if (transformedStr.charAt(lastIndex) == SPACE_CHAR && needDropPreSpace(inner)) { + transformedStr.deleteCharAt(lastIndex); + } + } + appendExpression(transformedStr, inner); + } + )+ + <EOF> + { + return transformedStr.toString(); + } +} + +String Expression() : +{ + String innerString = ""; +} +{ + { + if (Thread.currentThread().isInterrupted()) { + throw new ParseException("EscapeParser is interrupted"); + } + } + (innerString = EscapeExpress() + | innerString = SubstringExpression() + | innerString = OverlayExpression() + | innerString = GroupingSetsExpression() + | innerString = TsDiffOrAddExpression() + | innerString = Hint() + | innerString = CubePriority() + | innerString = Numeric() + | innerString = CastExpression() + | innerString = ExtractExpression() + | innerString = CeilFloorExpress() + | innerString = CeilFloorDatetimeExpress() + | innerString = ParenExpress() + | innerString = QuotedString() + | innerString = DoubleQuotedString() + | innerString = BangEqual() + | innerString = Comma() + | innerString = Space() + | innerString = PiFunction() + | innerString = Punctuation() + | innerString = From() + | innerString = For() + | innerString = To() + | innerString = As() + | innerString = Any() + ) + { + return innerString; + } +} + +String Comma() : +{} +{ + < COMMA > + { + log.trace("meet token <COMMA>"); + return getToken(0).image; + } +} + +String Space() : +{} +{ + < SPACE > + { + log.trace("meet token <SPACE>"); + return " "; + } +} + +String Punctuation() : +{} +{ + (<DOT> | <LE> |<GE> | <NE> | <BANGEQUAL> | <EQ> | <GT> | <LT> | <NOT> | <HOOK> | <COLON> | <PLUS> + | <MINUS> | <STAR> | <SLASH> | <MOD> | <CONCAT> | <CARET> | <DOLLAR> | <VERTICAL_BAR> ) + { + log.trace("meet token {}", getToken(0).toString()); + return getToken(0).image; + } +} + +String From() : +{} +{ + < FROM > + { + log.trace("met token <FROM>"); + return getToken(0).image; + } +} + +String For() : +{} +{ + < FOR > + { + log.trace("met token <FOR>"); + return getToken(0).image; + } +} + +String To() : +{} +{ + < TO > + { + log.trace("met token <TO>"); + return getToken(0).image; + } +} + +String As() : +{} +{ + < AS > + { + log.trace("met token <AS>"); + return getToken(0).image; + } +} + +String DoubleQuotedString() : +{} +{ + <QUOTED_IDENTIFIER> + { + return dialect.transformDoubleQuoteString(getToken(0).image); + } +} + +String QuotedString() : +{ + String s; +} +{ + <BINARY_STRING_LITERAL> + { + log.trace("meet token in <BINARY_STRING_LITERAL>: " + getToken(0).image); + return getToken(0).image; + } + | <QUOTED_STRING> + { + log.trace("meet token in <QUOTED_STRING>: " + getToken(0).image); + return getToken(0).image; + } + | <PREFIXED_STRING_LITERAL> + { + log.trace("meet token in <PREFIXED_STRING_LITERAL>: " + getToken(0).image); + return getToken(0).image; + } + | <UNICODE_STRING_LITERAL> + { + log.trace("meet token in <UNICODE_STRING_LITERAL>: " + getToken(0).image); + return getToken(0).image; + } +} + +String Any() : +{} +{ + < ANY > + { + log.trace("meet token in <ANY>: " + getToken(0).image); + return getToken(0).image; + } +} + +String ParenExpress() : +{ + String innerString; + StringBuilder transformed = new StringBuilder(); +} +{ + < LPAREN > + ( + innerString = Expression() + { + if(transformed.length() != 0) { + int lastIndex = transformed.length() - 1; + if (transformed.charAt(lastIndex) == SPACE_CHAR && needDropPreSpace(innerString)) { + transformed.deleteCharAt(lastIndex); + } + innerString = normalize(innerString); + } + appendExpression(transformed, innerString); + } + )* + < RPAREN > + { + return "(" + transformed.toString() + ")"; + } +} + +String EscapeExpress() : +{ + String innerString; +} +{ + < LBRACE > + (< SPACE >)* + ( + innerString = EscapeFunction() + | innerString = EscapeTimestamp() + | innerString = EscapeDate() + | innerString = EscapeTime() + ) + (< SPACE >)* + < RBRACE > + { + return innerString; + } +} + +String CastExpression() : +{ + String function; + List<String> parameters = Lists.newArrayList(); +} +{ + <CAST> + { + function = getToken(0).image; + } + (<SPACE> )* <LPAREN> (<SPACE>)* + { + parameters.add(ParameterExpression().trim()); + } + (<SPACE>)* + <AS> + { + parameters.add(getToken(0).image); + } + (<SPACE>)* + { + String type = dialect.transformDataType(ParameterExpression().trim()); + parameters.add(type); + } + (< SPACE >)* <RPAREN> + { + return String.format(Locale.ROOT, "%s(%s)", function, String.join(SPACE_STR, parameters)); + } +} + +String ExtractExpression() : +{ + String functionName; + String parameter; + ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", "MONTH", "WEEK", + "DAY", "HOUR", "MINUTE", "SECOND", "DOW", "DOY", "EPOCH"); +} +{ + <EXTRACT> (<SPACE> )* <LPAREN> (<SPACE> )* + { + functionName = ParameterExpression().trim(); + if (!timeunitSet.contains(functionName.toUpperCase(Locale.ROOT))) { + throw new IllegalArgumentException(String.format(Locale.ROOT, EXTRACT_FROM_EXCEPTION_MSG, + functionName, String.join(", ", timeunitSet))); + } + if (functionName.equalsIgnoreCase("DAY")) { + functionName = "DAYOFMONTH"; + } else if (functionName.equalsIgnoreCase("DOW")) { + functionName = "DAYOFWEEK"; + } else if (functionName.equalsIgnoreCase("DOY")) { + functionName = "DAYOFYEAR"; + } +} + (<SPACE>)* <FROM> (<SPACE>)* + { + parameter = ParameterExpression().trim(); + } + (< SPACE >)* <RPAREN> + { + return dialect.transformFN(functionName, new String[]{ parameter }); + } +} + +String TsDiffOrAddExpression() : +{ + String functionName; + List<String> parameters = Lists.newArrayList(); + ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", "MONTH", "WEEK", + "DAY", "HOUR", "MINUTE", "SECOND", "SQL_TSI_SECOND", "SQL_TSI_MINUTE", "SQL_TSI_HOUR", + "SQL_TSI_DAY", "SQL_TSI_WEEK", "SQL_TSI_MONTH", "SQL_TSI_QUARTER","SQL_TSI_YEAR"); +} +{ + (<TIMESTAMPDIFF> | <TIMESTAMPADD>) + { + functionName = getToken(0).image; + } + (< SPACE >)* <LPAREN> (< SPACE >)* + { + String timeunit = ParameterExpression().trim(); + if (timeunit.startsWith(QUOTE_STR) && timeunit.endsWith(QUOTE_STR) && timeunit.length() > 2) { + timeunit = timeunit.substring(1, timeunit.length() - 1); + } + if (!timeunitSet.contains(timeunit.toUpperCase(Locale.ROOT))) { + throw new IllegalStateException(String.format(Locale.ROOT, TIMESTAMP_DIFF_ADD_EXCEPTION_MSG, + functionName, timeunit, String.join(", ", timeunitSet))); + } + if (timeunit.toUpperCase(Locale.ROOT).startsWith("SQL_TSI_")) { + timeunit = timeunit.toUpperCase(Locale.ROOT).replace("SQL_TSI_", ""); + } + parameters.add(dialect.transformTimeunitOfTimestampDiffOrAdd(timeunit)); + } + (< SPACE >)* <COMMA> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* <COMMA> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* <RPAREN> + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +String SubstringExpression(): +{ + String functionName; + List<String> parameters = Lists.newArrayList(); + List<String> params = Lists.newArrayList(); +} +{ + (< SUBSTR > | < SUBSTRING > ) + { + functionName = getToken(0).image; + } + (< SPACE >)* <LPAREN> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* (<FROM> | <COMMA>) (<SPACE> )* + { + parameters.add(ParameterExpression().trim()); + } + ( + (< SPACE >)* (< FOR > | <COMMA>) (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + )? + (< SPACE >)* < RPAREN > + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +String OverlayExpression(): +{ + String functionName; + List<String> parameters = Lists.newArrayList(); + List<String> params = Lists.newArrayList(); +} +{ + < OVERLAY> + { + functionName = getToken(0).image; + } + (< SPACE >)* <LPAREN> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* <PLACING> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* (<FROM> | <COMMA>) (<SPACE> )* + { + parameters.add(ParameterExpression().trim()); + } + ( + (< SPACE >)* (< FOR > | <COMMA>) (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + )? + (< SPACE >)* < RPAREN > + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +String GroupingSetsExpression() : +{ + String function; + List <String> parameters = Lists.newArrayList(); +} +{ + <GROUPING> + { + function = getToken(0).image; + } + ( + (<SPACE>)* + <SETS> + { + log.trace("recognized grouping sets"); + function = getToken(0).image; + } + )? + { + log.trace("recognized grouping function"); + } + (<SPACE>)* <LPAREN> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + ( + (< SPACE >)* <COMMA> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + )* + (<SPACE>)* <RPAREN> + { + return dialect.transformFN(function, parameters.toArray(new String [ 0 ])); + } +} + +String CeilFloorExpress() : +{ + String functionName; + List <String> parameters = Lists.newArrayList(); + ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", "MONTH", "WEEK", + "DAY", "HOUR", "MINUTE", "SECOND"); +} +{ + ( < CEIL > | < FLOOR > ) + { + functionName = getToken(0).image; + } + (< SPACE >)* <LPAREN> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* + ( + <TO> (<SPACE>)* + { + String timeUnit = ParameterExpression().trim(); + if (!timeunitSet.contains(timeUnit.toUpperCase(Locale.ROOT))) { + throw new IllegalStateException(String.format(Locale.ROOT, CEIL_FLOOR_EXCEPTION_MSG, + functionName, timeUnit, String.join(", ", timeunitSet))); + } + parameters.add(timeUnit); + } + (<SPACE>)* + )? + <RPAREN> + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +String CeilFloorDatetimeExpress() : +{ + String functionName; + List <String> parameters = Lists.newArrayList(); + ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", "MONTH", "WEEK", + "DAY", "HOUR", "MINUTE", "SECOND"); +} +{ + ( < CEIL_DATETIME> | < FLOOR_DATETIME> ) + { + functionName = getToken(0).image; + } + (< SPACE >)* <LPAREN> (< SPACE >)* + { + parameters.add(ParameterExpression().trim()); + } + (< SPACE >)* + ( + <COMMA> (<SPACE>)* + { + String timeUnit = StringUtils.trim(ParameterExpression()); + String unitToValidate = StringUtils.remove(timeUnit, '\''); + if (!timeunitSet.contains(StringUtils.upperCase(unitToValidate))) { + throw new IllegalStateException(String.format(Locale.ROOT, CEIL_FLOOR_EXCEPTION_MSG, + functionName, unitToValidate, String.join(", ", timeunitSet))); + } + parameters.add(timeUnit); + } + (<SPACE>)* + )? + <RPAREN> + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +String PiFunction() : +{ + String function; +} +{ + <PI> + { + return dialect.transformNiladicFunction("PI"); + } +} + +String Hint(): +{} +{ + <HINT> + { + log.trace("meet token <HINT>"); + return getToken(0).image; + } +} + +String CubePriority(): +{} +{ + <CUBE_PRIORITY> + { + log.trace("meet token <CubePriority>"); + return getToken(0).image; + } +} + +String Numeric(): +{} +{ + (<UNSIGNED_INTEGER_LITERAL> | <DECIMAL_NUMERIC_LITERAL> | <APPROX_NUMERIC_LITERAL>) + { + log.trace("meet token <APPROX_NUMERIC_LITERAL>"); + return getToken(0).image; + } +} + +String EscapeFunction() : +{ + String functionName; + String param; + List <String> parameters = Lists.newArrayList(); + ImmutableSet<String> tsFunctions = ImmutableSet.of("TIMESTAMPDIFF", "TIMESTAMPADD"); +} +{ + < FN >< FUNCTION_NAME > + { + functionName = getToken(0).image; + } + < LPAREN > (<SPACE>)* + ( + param = ParameterExpression() + { + if (tsFunctions.contains(functionName.trim().toUpperCase(Locale.ROOT))) { + if (param.toUpperCase(Locale.ROOT).startsWith("SQL_TSI_")) { + param = param.toUpperCase(Locale.ROOT).replace("SQL_TSI_", ""); + } + parameters.add(dialect.transformTimeunitOfTimestampDiffOrAdd(param)); + } else { + parameters.add(param); + } + } + ( + < COMMA > EscapeFunctionParameter(parameters) + )* + )? + < RPAREN > + { + return dialect.transformFN(functionName, parameters.toArray(new String [ 0 ])); + } +} + +void EscapeFunctionParameter(List<String> paramCollection) : +{ + String parameter = ""; +} +{ + ( + parameter = ParameterExpression() + ) + { + paramCollection.add(parameter.trim()); + } +} + +String ParameterExpression() : +{ + String innerString = ""; + String nextString = ""; +} +{ + ( + innerString = EscapeExpress() + | innerString = SubstringExpression() + | innerString = OverlayExpression() + | innerString = GroupingSetsExpression() + | innerString = CastExpression() + | innerString = TsDiffOrAddExpression() + | innerString = ExtractExpression() + | innerString = CeilFloorExpress() + | innerString = CeilFloorDatetimeExpress() + | innerString = Numeric() + | innerString = Hint() + | innerString = CubePriority() + | innerString = ParenExpress() + | innerString = QuotedString() + | innerString = DoubleQuotedString() + | innerString = Space() + | innerString = PiFunction() + | innerString = Punctuation() + | innerString = Any() + ) + ( + LOOKAHEAD(2) + nextString = ParameterExpression() + )? + { + return innerString + nextString; + } +} + +String EscapeTimestamp() : +{ + String timestampExpr; +} +{ + < TS > + (< SPACE >)* + ( + timestampExpr = QuotedString() + ) + { + return "TIMESTAMP " + timestampExpr; + } +} + +String EscapeDate() : +{ + String timestampExpr; +} +{ + (< DATE > | <D>) + (< SPACE >)* + ( + timestampExpr = QuotedString() + ) + { + return "DATE " + timestampExpr; + } +} + +String EscapeTime() : +{ + String timestampExpr; +} +{ + (< T > | <TIME>) + (< SPACE >)* + ( + timestampExpr = QuotedString() + ) + { + return "TIME " + timestampExpr; + } +} + +String BangEqual() : +{ +} +{ + <BANGEQUAL> + { + return "<>"; + } +} diff --git a/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java b/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java index 31f6a9f353..3d453ce642 100644 --- a/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java +++ b/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java @@ -133,6 +133,7 @@ import org.apache.kylin.query.exception.NotSupportedSQLException; import org.apache.kylin.query.exception.UserStopQueryException; import org.apache.kylin.query.relnode.ContextUtil; import org.apache.kylin.query.relnode.OLAPContext; +import org.apache.kylin.query.util.EscapeDialect; import org.apache.kylin.query.util.QueryLimiter; import org.apache.kylin.query.util.QueryModelPriorities; import org.apache.kylin.query.util.QueryParams; @@ -141,6 +142,7 @@ import org.apache.kylin.query.util.RawSql; import org.apache.kylin.query.util.RawSqlParser; import org.apache.kylin.query.util.SlowQueryDetector; import org.apache.kylin.query.util.TokenMgrError; +import org.apache.kylin.query.util.WhiteSpaceParser; import org.apache.kylin.rest.aspect.Transaction; import org.apache.kylin.rest.cluster.ClusterManager; import org.apache.kylin.rest.config.AppConfig; @@ -592,6 +594,10 @@ public class QueryService extends BasicService implements CacheSignatureQuerySup RawSql rawSql = new RawSqlParser(sqlRequest.getSql()).parse(); rawSql.autoAppendLimit(kylinConfig, sqlRequest.getLimit(), sqlRequest.getOffset()); + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(EscapeDialect.DEFAULT, rawSql.getStatementString().trim()); + + sqlRequest.setNormalizedSql(whiteSpaceParser.parse()); + // Reset request sql for code compatibility sqlRequest.setSql(rawSql.getStatementString()); // Set user sql for log & record purpose diff --git a/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java b/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java new file mode 100644 index 0000000000..271828d66d --- /dev/null +++ b/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.query.util; + +import org.junit.Assert; +import org.junit.Test; + +public class WhiteSpaceParserTest { + + private final EscapeDialect dialect = EscapeDialect.DEFAULT; + + private static final String expected = "select ACCOUNT_ID, ACCOUNT_ID + 1, 'a + b', 'c* d', ACCOUNT_ID + 2, count ( * ) from KYLIN_ACCOUNT group by ACCOUNT_ID limit 400 "; + + private static final String realSqlWithBinary = "select ACCOUNT_ID, ACCOUNT_ID + 1, x'a+ b', X'c * d', ACCOUNT_ID + 2, count ( * ) from KYLIN_ACCOUNT group by ACCOUNT_ID limit 400 "; + + private static final String realSql = "select ACCOUNT_ID, ACCOUNT_ID + 1, 'a+ b', 'c * d', ACCOUNT_ID + 2, count ( * ) from KYLIN_ACCOUNT group by ACCOUNT_ID limit 400 "; + + @Test + public void testMultiWhiteSpaces() throws ParseException { + String sql = "select ACCOUNT_ID, ACCOUNT_ID + 1 ,'a + b' ,'c* d', ACCOUNT_ID +2,count(*) from KYLIN_ACCOUNT group by ACCOUNT_ID limit 400 "; + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, sql.trim()); + String parsed = whiteSpaceParser.parse(); + Assert.assertEquals(expected, parsed); + } + + @Test + public void testNewLines() throws ParseException { + String newLines = "select ACCOUNT_ID, ACCOUNT_ID + 1 ,'a + b' ,'c* d', ACCOUNT_ID +2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + testEqual(newLines); + } + + @Test + public void testCommas() throws ParseException { + String commas = "select ACCOUNT_ID , ACCOUNT_ID + 1 , 'a + b' ,'c* d', ACCOUNT_ID +2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + testEqual(commas); + } + + @Test + public void testOperators() throws ParseException { + String operators = "select ACCOUNT_ID , ACCOUNT_ID + 1 , 'a + b' ,'c* d', ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + testEqual(operators); + } + + private void testEqual(String sql) throws ParseException { + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, sql.trim()); + String parsed = whiteSpaceParser.parse(); + Assert.assertEquals(expected, parsed); + } + + @Test + public void testNormalEscapeString() throws ParseException { + String sql = "select ACCOUNT_ID , ACCOUNT_ID + 1 , 'a+ b' ,'c * d', ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, sql.trim()); + String parsed = whiteSpaceParser.parse(); + Assert.assertNotEquals(expected, parsed); + Assert.assertEquals(realSql, parsed); + } + + @Test + public void testBinaryEscapeStringWithRegexWhiteSpace() throws ParseException { + String sql = "select ACCOUNT_ID , ACCOUNT_ID + 1 , x'a+ b' ,X'c * d', ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + String sql1 = "select ACCOUNT_ID , \n ACCOUNT_ID \r + 1 , x'a+ b' ,X'c * d',\f ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + String sql2 = "select ACCOUNT_ID , \n\n ACCOUNT_ID \r\r + 1 , x'a+ b' ,X'c * d',\f ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + String sql3 = "select \n ACCOUNT_ID , \n\n\r\t ACCOUNT_ID \r\r + 1 , x'a+ b' ,X'c * d',\f ACCOUNT_ID+2,count(*) from " + + "KYLIN_ACCOUNT " + "group by ACCOUNT_ID " + "limit 400 "; + testBinary(sql); + testBinary(sql1); + testBinary(sql2); + testBinary(sql3); + } + + private void testBinary(String sql) throws ParseException { + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, sql.trim()); + String parsed = whiteSpaceParser.parse(); + Assert.assertNotEquals(expected, parsed); + Assert.assertEquals(realSqlWithBinary, parsed); + } + + @Test + public void testExtractEpoch() throws ParseException { + String sql = "SELECT ((EXTRACT(EPOCH FROM (DATE '2050-01-01')) / ( 60 * 60 * 24)) + (365 * 70 + 17)) AS \"TEMP(Test)(3947742720)(0)\" FROM \"TDVT\".\"CALCS\" \"CALCS\" HAVING (COUNT(1) > 0)"; + WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, sql.trim()); + String parsed = whiteSpaceParser.parse(); + String real = "SELECT (({fn EPOCH((DATE '2050-01-01' ))} / (60 * 60 * 24 ) ) + (365 * 70 + 17 ) ) AS \"TEMP(Test)(3947742720)(0)\" FROM \"TDVT\" . \"CALCS\" \"CALCS\" HAVING (COUNT (1 ) > 0 ) "; + Assert.assertEquals(real, parsed); + } +}