This is an automated email from the ASF dual-hosted git repository.

liyang pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit a8ad269b3c37d8670f0c5205ecef9d60a2796df3
Author: lixiang <447399...@qq.com>
AuthorDate: Fri Jul 21 15:29:49 2023 +0800

    KYLIN-5766 Normalize query cache key
---
 .../org/apache/kylin/rest/request/SQLRequest.java  |   4 +-
 .../apache/kylin/query/util/WhiteSpaceParser.jj    | 887 +++++++++++++++++++++
 .../apache/kylin/rest/service/QueryService.java    |   6 +
 .../kylin/query/util/WhiteSpaceParserTest.java     | 110 +++
 4 files changed, 1006 insertions(+), 1 deletion(-)

diff --git 
a/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java
 
b/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java
index 58d9a82596..2bf34c75c5 100644
--- 
a/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java
+++ 
b/src/common-service/src/main/java/org/apache/kylin/rest/request/SQLRequest.java
@@ -77,13 +77,15 @@ public class SQLRequest implements Serializable, 
ProjectInsensitiveRequest, Vali
     @Size(max = 256)
     private String user_defined_tag;
 
+    private String normalizedSql;
+
     protected volatile Object cacheKey = null;
 
     public Object getCacheKey() {
         if (cacheKey != null)
             return cacheKey;
 
-        cacheKey = Lists.newArrayList(sql.replaceAll("[ ]", " "), //
+        cacheKey = Lists.newArrayList(normalizedSql, //
                 project, //
                 offset, //
                 limit, //
diff --git 
a/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj
 
b/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj
new file mode 100644
index 0000000000..c07936792a
--- /dev/null
+++ 
b/src/query-common/src/main/codegen/javacc/org/apache/kylin/query/util/WhiteSpaceParser.jj
@@ -0,0 +1,887 @@
+options {
+    STATIC = false;
+    IGNORE_CASE = true;
+    UNICODE_INPUT = true;
+}
+
+PARSER_BEGIN(WhiteSpaceParser)
+package org.apache.kylin.query.util;
+
+import java.io.StringReader;
+import java.nio.charset.Charset;
+import java.util.List;
+import java.util.Locale;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Scanner;
+
+import org.apache.commons.lang3.StringUtils;
+
+import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet;
+import org.apache.kylin.guava30.shaded.common.collect.Lists;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class WhiteSpaceParser {
+
+    private static final String CEIL_FLOOR_EXCEPTION_MSG = "%s(exp to 
timeunit) encounters timeunit %s, but only supports %s";
+    private static final String EXTRACT_FROM_EXCEPTION_MSG = "extract(timeunit 
from exp) encounters timeunit %s, but only supports %s";
+    private static final String TIMESTAMP_DIFF_ADD_EXCEPTION_MSG = 
"%s(timeunit, exp1, exp2) encounters timeunit %s, but only supports %s";
+    private static final String COMMA_STR = ",";
+    private static final String SPACE_STR = " ";
+    private static final char SPACE_CHAR = ' ';
+    private static final String QUOTE_STR = "'";
+    private static final Set<String> OPERATORS = new 
HashSet(Arrays.asList("+", "-", "*", "/", "%", "=",">=", "<=", "!=", "<>", 
"||"));
+
+    private EscapeDialect dialect = EscapeDialect.DEFAULT;
+
+  public static void main(String[] args) throws Exception {
+    String sql = "select  *          col1    ,\ncol2,col3  from \n tableA  \\r 
  a left \t\n table\rb     (  select *      from  t) ";
+    WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(new 
StringReader(sql));
+    System.out.println("transformed sql is :" + whiteSpaceParser.parse());
+  }
+
+    private static String normalize(String str) {
+      if (str.equals(COMMA_STR)) {
+        str = str + SPACE_STR;
+      } else if (OPERATORS.contains(str)) {
+        str = SPACE_STR + str + SPACE_STR;
+      }
+      return str;
+    }
+
+  public WhiteSpaceParser(EscapeDialect newDialect, String SQL) {
+    this(new StringReader(SQL));
+    this.dialect = newDialect;
+  }
+
+    private static boolean needDropPreSpace(String str) {
+        return COMMA_STR.equals(str) || OPERATORS.contains(str)
+            || (StringUtils.isBlank(str) && !SPACE_STR.equals(str));
+    }
+
+     private static boolean skipSpace(String str) {
+          return COMMA_STR.equals(str) || OPERATORS.contains(str);
+      }
+
+  private static void appendExpression(StringBuilder sb, String innerString) {
+      if (sb.length() != 0) {
+          int last = sb.length() - 1;
+          if (sb.charAt(last) != SPACE_CHAR && 
innerString.startsWith(QUOTE_STR)) {
+            sb.append(SPACE_STR);
+          }
+      }
+
+      int len = sb.length();
+      if (!SPACE_STR.equals(innerString) || (len != 0 && sb.charAt(len - 1) != 
SPACE_CHAR)) {
+        String tmp = normalize(innerString);
+        if (skipSpace(innerString)) {
+          sb.append(tmp);
+        } else {
+            sb.append(tmp).append(SPACE_STR);
+        }
+      }
+  }
+
+}
+
+PARSER_END(WhiteSpaceParser)
+
+<ESCAPE, FUNCTION, TIMEUNIT>
+SKIP : {
+  <([" ", "\t", "\n", "\r", "\f"])+ >
+}
+
+<NILADIC>
+SKIP :
+{
+    <PAREN: ((<SPACE>)* <LPAREN> (<SPACE>)* <RPAREN>)? > : DEFAULT
+}
+
+< DEFAULT >
+TOKEN :
+{
+  < LBRACE : "{" > : ESCAPE
+| < RBRACE : "}" >
+| < RPAREN : ")" >
+| < LPAREN : "(" >
+| < COMMA : "," >
+| < SPACE : ([" ", "\t", "\n", "\r", "\f"])+ >
+| < QUOTE: "'" >
+// possible strings copied from calcite
+|
+    < BINARY_STRING_LITERAL: ["x","X"] <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> >
+|
+    < QUOTED_STRING: <QUOTE> ( (~["'"]) | ("''"))* <QUOTE> >
+|
+    < PREFIXED_STRING_LITERAL: ("_" <CHARSETNAME> | "N") <QUOTED_STRING> >
+|
+    < UNICODE_STRING_LITERAL: "U" "&" <QUOTED_STRING> >
+|
+    < #CHARSETNAME: (["a"-"z","A"-"Z","0"-"9"])
+    (["a"-"z","A"-"Z","0"-"9",":",".","-","_"])*
+    >
+| < DOUBLE_QUOTE : "\"">
+| < QUOTED_IDENTIFIER: <DOUBLE_QUOTE> ( (~["\"","\n","\r"]) | ("\"\"") )+ 
<DOUBLE_QUOTE> >
+| < HINT: "/*" (~["\n", "\r"])* "*/" >
+| < CUBE_PRIORITY: "--" ([" "])* "CubePriority(" (~["\n", "\r", ")"])* ")" ([" 
"])* "\n" >
+| < UNSIGNED_INTEGER_LITERAL: (["0"-"9"])+ >
+| < APPROX_NUMERIC_LITERAL:(<UNSIGNED_INTEGER_LITERAL> | 
<DECIMAL_NUMERIC_LITERAL>) <EXPONENT> >
+| < DECIMAL_NUMERIC_LITERAL:(["0"-"9"])+(".")?(["0"-"9"])*| "."(["0"-"9"])+>
+| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
+| < DOT : "." >
+| < CEIL : "CEIL" >
+| < CEIL_DATETIME : "CEIL_DATETIME" >
+| < FLOOR_DATETIME : "FLOOR_DATETIME" >
+| < FLOOR : "FLOOR" >
+| < TO : "TO" >
+| < SUBSTRING : "SUBSTRING" >
+| < SUBSTR :  "SUBSTR" >
+| < FROM : "FROM" >
+| < FOR : "FOR" >
+| < CAST : "CAST" >
+| < AS : "AS" >
+| < TIMESTAMPADD : "TIMESTAMPADD" >
+| < TIMESTAMPDIFF : "TIMESTAMPDIFF" >
+| < EXTRACT : "EXTRACT" >
+| < OVERLAY : "OVERLAY" >
+| < PLACING : "PLACING" >
+| < GROUPING : "GROUPING" >
+| < SETS : "SETS" >
+| < PI : "PI" > : NILADIC
+| < ANY : (~[ "{", "}", " ", "(", ")", ",", "'", "\t", "\n", "\r", "\"", ".",
+    "=", ">", "<", "?", ":", "!", "+", "-", "*", "/", "%", "|", "^", "$" ])+ >
+}
+
+< DEFAULT >
+TOKEN :
+{
+    < EQ: "=" >
+|   < GT: ">" >
+|   < LT: "<" >
+|   < HOOK: "?" >
+|   < COLON: ":" >
+|   < LE: "<=" >
+|   < GE: ">=" >
+|   < NE: "<>" >
+|   < BANGEQUAL : "!=" >
+|   < NOT: "!" >
+|   < PLUS: "+" >
+|   < MINUS: "-" >
+|   < STAR: "*" >
+|   < SLASH: "/" >
+|   < MOD: "%" >
+|   < CONCAT: "||" >
+|   < VERTICAL_BAR: "|" >
+|   < CARET: "^" >
+|   < DOLLAR: "$" >
+}
+
+< ESCAPE >
+TOKEN :
+{
+  < FN : "fn" > : FUNCTION
+| < TS : "ts" > : DEFAULT
+| < D : "d" > : DEFAULT
+| < DATE : "date" > : DEFAULT
+| < T : "t" > : DEFAULT
+| < TIME : "time" > : DEFAULT
+}
+
+< FUNCTION >
+TOKEN :
+{
+  < FUNCTION_NAME : <ANY> > : DEFAULT
+}
+
+
+String parse():
+{
+  StringBuilder transformedStr = new StringBuilder();
+  String inner;
+}
+{
+  (
+  LOOKAHEAD(2)
+  inner = Expression()
+  {
+    if(transformedStr.length() != 0) {
+                int lastIndex = transformedStr.length() - 1;
+                if (transformedStr.charAt(lastIndex) == SPACE_CHAR && 
needDropPreSpace(inner)) {
+                 transformedStr.deleteCharAt(lastIndex);
+                }
+            }
+            appendExpression(transformedStr, inner);
+  }
+  )+
+  <EOF>
+  {
+    return transformedStr.toString();
+  }
+}
+
+String Expression() :
+{
+    String innerString = "";
+}
+{
+    {
+        if (Thread.currentThread().isInterrupted()) {
+            throw new ParseException("EscapeParser is interrupted");
+        }
+    }
+    (innerString = EscapeExpress()
+     | innerString = SubstringExpression()
+     | innerString = OverlayExpression()
+     | innerString = GroupingSetsExpression()
+     | innerString = TsDiffOrAddExpression()
+     | innerString = Hint()
+     | innerString = CubePriority()
+     | innerString = Numeric()
+     | innerString = CastExpression()
+     | innerString = ExtractExpression()
+     | innerString = CeilFloorExpress()
+     | innerString = CeilFloorDatetimeExpress()
+     | innerString = ParenExpress()
+     | innerString = QuotedString()
+     | innerString = DoubleQuotedString()
+     | innerString = BangEqual()
+     | innerString = Comma()
+     | innerString = Space()
+     | innerString = PiFunction()
+     | innerString = Punctuation()
+     | innerString = From()
+     | innerString = For()
+     | innerString = To()
+     | innerString = As()
+     | innerString = Any()
+    )
+    {
+        return innerString;
+    }
+}
+
+String Comma() :
+{}
+{
+    < COMMA >
+    {
+        log.trace("meet token <COMMA>");
+        return getToken(0).image;
+    }
+}
+
+String Space() :
+{}
+{
+    < SPACE >
+    {
+        log.trace("meet token <SPACE>");
+        return " ";
+    }
+}
+
+String Punctuation() :
+{}
+{
+    (<DOT> | <LE> |<GE> | <NE> | <BANGEQUAL> | <EQ> | <GT> | <LT> | <NOT> | 
<HOOK> | <COLON> | <PLUS>
+        | <MINUS> | <STAR> | <SLASH> | <MOD> | <CONCAT> | <CARET> | <DOLLAR> | 
<VERTICAL_BAR> )
+    {
+        log.trace("meet token {}", getToken(0).toString());
+        return getToken(0).image;
+    }
+}
+
+String From() :
+{}
+{
+    < FROM >
+    {
+        log.trace("met token <FROM>");
+        return getToken(0).image;
+    }
+}
+
+String For() :
+{}
+{
+    < FOR >
+    {
+        log.trace("met token <FOR>");
+        return getToken(0).image;
+    }
+}
+
+String To() :
+{}
+{
+    < TO >
+    {
+        log.trace("met token <TO>");
+        return getToken(0).image;
+    }
+}
+
+String As() :
+{}
+{
+    < AS >
+    {
+        log.trace("met token <AS>");
+        return getToken(0).image;
+    }
+}
+
+String DoubleQuotedString() :
+{}
+{
+    <QUOTED_IDENTIFIER>
+    {
+        return dialect.transformDoubleQuoteString(getToken(0).image);
+    }
+}
+
+String QuotedString() :
+{
+    String s;
+}
+{
+    <BINARY_STRING_LITERAL>
+    {
+        log.trace("meet token in <BINARY_STRING_LITERAL>: " + 
getToken(0).image);
+        return getToken(0).image;
+    }
+    | <QUOTED_STRING>
+    {
+        log.trace("meet token in <QUOTED_STRING>: " + getToken(0).image);
+        return getToken(0).image;
+    }
+    | <PREFIXED_STRING_LITERAL>
+    {
+        log.trace("meet token in <PREFIXED_STRING_LITERAL>: " + 
getToken(0).image);
+        return getToken(0).image;
+    }
+    | <UNICODE_STRING_LITERAL>
+    {
+        log.trace("meet token in <UNICODE_STRING_LITERAL>: " + 
getToken(0).image);
+        return getToken(0).image;
+    }
+}
+
+String Any() :
+{}
+{
+    < ANY >
+    {
+        log.trace("meet token in <ANY>: " + getToken(0).image);
+        return getToken(0).image;
+    }
+}
+
+String ParenExpress() :
+{
+    String innerString;
+    StringBuilder transformed = new StringBuilder();
+}
+{
+    < LPAREN >
+    (
+        innerString = Expression()
+        {
+        if(transformed.length() != 0) {
+            int lastIndex = transformed.length() - 1;
+            if (transformed.charAt(lastIndex) == SPACE_CHAR && 
needDropPreSpace(innerString)) {
+                transformed.deleteCharAt(lastIndex);
+            }
+            innerString = normalize(innerString);
+        }
+        appendExpression(transformed, innerString);
+        }
+    )*
+    < RPAREN >
+    {
+        return "(" + transformed.toString() + ")";
+    }
+}
+
+String EscapeExpress() :
+{
+    String innerString;
+}
+{
+    < LBRACE >
+    (< SPACE >)*
+    (
+        innerString = EscapeFunction()
+    |   innerString = EscapeTimestamp()
+    |   innerString = EscapeDate()
+    |   innerString = EscapeTime()
+    )
+    (< SPACE >)*
+    < RBRACE >
+    {
+        return innerString;
+    }
+}
+
+String CastExpression() :
+{
+    String function;
+    List<String> parameters = Lists.newArrayList();
+}
+{
+    <CAST>
+    {
+        function = getToken(0).image;
+    }
+    (<SPACE> )* <LPAREN> (<SPACE>)*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (<SPACE>)*
+    <AS>
+    {
+        parameters.add(getToken(0).image);
+    }
+    (<SPACE>)*
+    {
+        String type = dialect.transformDataType(ParameterExpression().trim());
+        parameters.add(type);
+    }
+    (< SPACE >)* <RPAREN>
+    {
+        return String.format(Locale.ROOT, "%s(%s)", function, 
String.join(SPACE_STR, parameters));
+    }
+}
+
+String ExtractExpression() :
+{
+    String functionName;
+    String parameter;
+    ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", 
"MONTH", "WEEK",
+        "DAY", "HOUR", "MINUTE", "SECOND", "DOW", "DOY", "EPOCH");
+}
+{
+    <EXTRACT> (<SPACE> )* <LPAREN> (<SPACE> )*
+    {
+    functionName = ParameterExpression().trim();
+    if (!timeunitSet.contains(functionName.toUpperCase(Locale.ROOT))) {
+        throw new IllegalArgumentException(String.format(Locale.ROOT, 
EXTRACT_FROM_EXCEPTION_MSG,
+            functionName, String.join(", ", timeunitSet)));
+    }
+    if (functionName.equalsIgnoreCase("DAY")) {
+        functionName = "DAYOFMONTH";
+    } else if (functionName.equalsIgnoreCase("DOW")) {
+        functionName = "DAYOFWEEK";
+    } else if (functionName.equalsIgnoreCase("DOY")) {
+        functionName = "DAYOFYEAR";
+    }
+}
+    (<SPACE>)* <FROM> (<SPACE>)*
+    {
+        parameter = ParameterExpression().trim();
+    }
+    (< SPACE >)* <RPAREN>
+    {
+        return dialect.transformFN(functionName, new String[]{ parameter });
+    }
+}
+
+String TsDiffOrAddExpression() :
+{
+    String functionName;
+    List<String> parameters = Lists.newArrayList();
+    ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", 
"MONTH", "WEEK",
+       "DAY", "HOUR", "MINUTE", "SECOND", "SQL_TSI_SECOND", "SQL_TSI_MINUTE", 
"SQL_TSI_HOUR",
+       "SQL_TSI_DAY", "SQL_TSI_WEEK", "SQL_TSI_MONTH", 
"SQL_TSI_QUARTER","SQL_TSI_YEAR");
+}
+{
+    (<TIMESTAMPDIFF> | <TIMESTAMPADD>)
+    {
+        functionName = getToken(0).image;
+    }
+    (< SPACE >)* <LPAREN> (< SPACE >)*
+    {
+        String timeunit = ParameterExpression().trim();
+        if (timeunit.startsWith(QUOTE_STR) && timeunit.endsWith(QUOTE_STR) && 
timeunit.length() > 2) {
+            timeunit = timeunit.substring(1, timeunit.length() - 1);
+        }
+        if (!timeunitSet.contains(timeunit.toUpperCase(Locale.ROOT))) {
+            throw new IllegalStateException(String.format(Locale.ROOT, 
TIMESTAMP_DIFF_ADD_EXCEPTION_MSG,
+                functionName, timeunit, String.join(", ", timeunitSet)));
+        }
+        if (timeunit.toUpperCase(Locale.ROOT).startsWith("SQL_TSI_")) {
+            timeunit = timeunit.toUpperCase(Locale.ROOT).replace("SQL_TSI_", 
"");
+        }
+        
parameters.add(dialect.transformTimeunitOfTimestampDiffOrAdd(timeunit));
+    }
+    (< SPACE >)* <COMMA> (< SPACE >)*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)* <COMMA> (< SPACE >)*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)* <RPAREN>
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+String SubstringExpression():
+{
+    String functionName;
+    List<String> parameters = Lists.newArrayList();
+    List<String> params = Lists.newArrayList();
+}
+{
+    (< SUBSTR > | < SUBSTRING > )
+    {
+        functionName = getToken(0).image;
+    }
+    (< SPACE >)* <LPAREN> (< SPACE >)*
+    {
+       parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)* (<FROM> | <COMMA>) (<SPACE> )*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (
+        (< SPACE >)* (< FOR > | <COMMA>) (< SPACE >)*
+        {
+           parameters.add(ParameterExpression().trim());
+        }
+    )?
+    (< SPACE >)* < RPAREN >
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+String OverlayExpression():
+{
+    String functionName;
+    List<String> parameters = Lists.newArrayList();
+    List<String> params = Lists.newArrayList();
+}
+{
+    < OVERLAY>
+    {
+        functionName = getToken(0).image;
+    }
+    (< SPACE >)* <LPAREN> (< SPACE >)*
+    {
+       parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)* <PLACING> (< SPACE >)*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)* (<FROM> | <COMMA>) (<SPACE> )*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (
+        (< SPACE >)* (< FOR > | <COMMA>) (< SPACE >)*
+        {
+           parameters.add(ParameterExpression().trim());
+        }
+    )?
+    (< SPACE >)* < RPAREN >
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+String GroupingSetsExpression() :
+{
+    String function;
+    List <String> parameters = Lists.newArrayList();
+}
+{
+    <GROUPING>
+    {
+        function = getToken(0).image;
+    }
+    (
+        (<SPACE>)*
+        <SETS>
+        {
+            log.trace("recognized grouping sets");
+            function = getToken(0).image;
+        }
+    )?
+    {
+        log.trace("recognized grouping function");
+    }
+    (<SPACE>)* <LPAREN> (< SPACE >)*
+    {
+        parameters.add(ParameterExpression().trim());
+    }
+    (
+        (< SPACE >)* <COMMA> (< SPACE >)*
+        {
+            parameters.add(ParameterExpression().trim());
+        }
+    )*
+    (<SPACE>)* <RPAREN>
+    {
+        return dialect.transformFN(function, parameters.toArray(new String [ 0 
]));
+    }
+}
+
+String CeilFloorExpress() :
+{
+     String functionName;
+     List <String> parameters = Lists.newArrayList();
+     ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", 
"MONTH", "WEEK",
+                           "DAY", "HOUR", "MINUTE", "SECOND");
+}
+{
+    ( < CEIL > | < FLOOR > )
+    {
+        functionName = getToken(0).image;
+    }
+    (< SPACE >)* <LPAREN> (< SPACE >)*
+    {
+       parameters.add(ParameterExpression().trim());
+    }
+    (< SPACE >)*
+    (
+        <TO> (<SPACE>)*
+        {
+            String timeUnit = ParameterExpression().trim();
+            if (!timeunitSet.contains(timeUnit.toUpperCase(Locale.ROOT))) {
+                throw new IllegalStateException(String.format(Locale.ROOT, 
CEIL_FLOOR_EXCEPTION_MSG,
+                    functionName, timeUnit, String.join(", ", timeunitSet)));
+            }
+            parameters.add(timeUnit);
+        }
+        (<SPACE>)*
+    )?
+    <RPAREN>
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+String CeilFloorDatetimeExpress() :
+{
+    String functionName;
+    List <String> parameters = Lists.newArrayList();
+    ImmutableSet<String> timeunitSet = ImmutableSet.of("YEAR", "QUARTER", 
"MONTH", "WEEK",
+                       "DAY", "HOUR", "MINUTE", "SECOND");
+}
+{
+    ( < CEIL_DATETIME> | < FLOOR_DATETIME> )
+    {
+        functionName = getToken(0).image;
+    }
+    (< SPACE >)* <LPAREN> (< SPACE >)*
+    {
+       parameters.add(ParameterExpression().trim());
+    }
+     (< SPACE >)*
+     (
+         <COMMA> (<SPACE>)*
+         {
+             String timeUnit = StringUtils.trim(ParameterExpression());
+             String unitToValidate = StringUtils.remove(timeUnit, '\'');
+             if (!timeunitSet.contains(StringUtils.upperCase(unitToValidate))) 
{
+                 throw new IllegalStateException(String.format(Locale.ROOT, 
CEIL_FLOOR_EXCEPTION_MSG,
+                     functionName, unitToValidate, String.join(", ", 
timeunitSet)));
+             }
+             parameters.add(timeUnit);
+         }
+         (<SPACE>)*
+     )?
+     <RPAREN>
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+String PiFunction() :
+{
+    String function;
+}
+{
+    <PI>
+    {
+        return dialect.transformNiladicFunction("PI");
+    }
+}
+
+String Hint():
+{}
+{
+    <HINT>
+    {
+        log.trace("meet token <HINT>");
+        return getToken(0).image;
+    }
+}
+
+String CubePriority():
+{}
+{
+    <CUBE_PRIORITY>
+    {
+        log.trace("meet token <CubePriority>");
+        return getToken(0).image;
+    }
+}
+
+String Numeric():
+{}
+{
+    (<UNSIGNED_INTEGER_LITERAL> | <DECIMAL_NUMERIC_LITERAL> | 
<APPROX_NUMERIC_LITERAL>)
+    {
+        log.trace("meet token <APPROX_NUMERIC_LITERAL>");
+        return getToken(0).image;
+    }
+}
+
+String EscapeFunction() :
+{
+    String functionName;
+    String param;
+    List <String> parameters = Lists.newArrayList();
+    ImmutableSet<String> tsFunctions = ImmutableSet.of("TIMESTAMPDIFF", 
"TIMESTAMPADD");
+}
+{
+    < FN >< FUNCTION_NAME >
+    {
+        functionName = getToken(0).image;
+    }
+    < LPAREN > (<SPACE>)*
+    (
+        param = ParameterExpression()
+        {
+            if 
(tsFunctions.contains(functionName.trim().toUpperCase(Locale.ROOT))) {
+                if (param.toUpperCase(Locale.ROOT).startsWith("SQL_TSI_")) {
+                    param = param.toUpperCase(Locale.ROOT).replace("SQL_TSI_", 
"");
+                }
+                
parameters.add(dialect.transformTimeunitOfTimestampDiffOrAdd(param));
+            } else {
+                parameters.add(param);
+            }
+        }
+        (
+            < COMMA > EscapeFunctionParameter(parameters)
+        )*
+    )?
+    < RPAREN >
+    {
+        return dialect.transformFN(functionName, parameters.toArray(new String 
[ 0 ]));
+    }
+}
+
+void EscapeFunctionParameter(List<String> paramCollection) :
+{
+    String parameter = "";
+}
+{
+    (
+        parameter = ParameterExpression()
+    )
+    {
+        paramCollection.add(parameter.trim());
+    }
+}
+
+String ParameterExpression() :
+{
+    String innerString = "";
+    String nextString = "";
+}
+{
+    (
+        innerString = EscapeExpress()
+    | innerString = SubstringExpression()
+    | innerString = OverlayExpression()
+    | innerString = GroupingSetsExpression()
+    | innerString = CastExpression()
+    | innerString = TsDiffOrAddExpression()
+    | innerString = ExtractExpression()
+    | innerString = CeilFloorExpress()
+    | innerString = CeilFloorDatetimeExpress()
+    | innerString = Numeric()
+    | innerString = Hint()
+    | innerString = CubePriority()
+    | innerString = ParenExpress()
+    | innerString = QuotedString()
+    | innerString = DoubleQuotedString()
+    | innerString = Space()
+    | innerString = PiFunction()
+    | innerString = Punctuation()
+    | innerString = Any()
+    )
+    (
+        LOOKAHEAD(2)
+        nextString = ParameterExpression()
+    )?
+    {
+        return innerString + nextString;
+    }
+}
+
+String EscapeTimestamp() :
+{
+    String timestampExpr;
+}
+{
+    < TS >
+    (< SPACE >)*
+    (
+        timestampExpr = QuotedString()
+    )
+    {
+        return "TIMESTAMP " + timestampExpr;
+    }
+}
+
+String EscapeDate() :
+{
+    String timestampExpr;
+}
+{
+    (< DATE > | <D>)
+    (< SPACE >)*
+    (
+        timestampExpr = QuotedString()
+    )
+    {
+        return "DATE " + timestampExpr;
+    }
+}
+
+String EscapeTime() :
+{
+    String timestampExpr;
+}
+{
+    (< T > | <TIME>)
+    (< SPACE >)*
+    (
+        timestampExpr = QuotedString()
+    )
+    {
+        return "TIME " + timestampExpr;
+    }
+}
+
+String BangEqual() :
+{
+}
+{
+    <BANGEQUAL>
+    {
+        return "<>";
+    }
+}
diff --git 
a/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java
 
b/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java
index 31f6a9f353..3d453ce642 100644
--- 
a/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java
+++ 
b/src/query-service/src/main/java/org/apache/kylin/rest/service/QueryService.java
@@ -133,6 +133,7 @@ import 
org.apache.kylin.query.exception.NotSupportedSQLException;
 import org.apache.kylin.query.exception.UserStopQueryException;
 import org.apache.kylin.query.relnode.ContextUtil;
 import org.apache.kylin.query.relnode.OLAPContext;
+import org.apache.kylin.query.util.EscapeDialect;
 import org.apache.kylin.query.util.QueryLimiter;
 import org.apache.kylin.query.util.QueryModelPriorities;
 import org.apache.kylin.query.util.QueryParams;
@@ -141,6 +142,7 @@ import org.apache.kylin.query.util.RawSql;
 import org.apache.kylin.query.util.RawSqlParser;
 import org.apache.kylin.query.util.SlowQueryDetector;
 import org.apache.kylin.query.util.TokenMgrError;
+import org.apache.kylin.query.util.WhiteSpaceParser;
 import org.apache.kylin.rest.aspect.Transaction;
 import org.apache.kylin.rest.cluster.ClusterManager;
 import org.apache.kylin.rest.config.AppConfig;
@@ -592,6 +594,10 @@ public class QueryService extends BasicService implements 
CacheSignatureQuerySup
             RawSql rawSql = new RawSqlParser(sqlRequest.getSql()).parse();
             rawSql.autoAppendLimit(kylinConfig, sqlRequest.getLimit(), 
sqlRequest.getOffset());
 
+            WhiteSpaceParser whiteSpaceParser = new 
WhiteSpaceParser(EscapeDialect.DEFAULT, rawSql.getStatementString().trim());
+
+            sqlRequest.setNormalizedSql(whiteSpaceParser.parse());
+
             // Reset request sql for code compatibility
             sqlRequest.setSql(rawSql.getStatementString());
             // Set user sql for log & record purpose
diff --git 
a/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java 
b/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java
new file mode 100644
index 0000000000..271828d66d
--- /dev/null
+++ 
b/src/query/src/test/java/org/apache/kylin/query/util/WhiteSpaceParserTest.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.query.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class WhiteSpaceParserTest {
+
+    private final EscapeDialect dialect = EscapeDialect.DEFAULT;
+
+    private static final String expected = "select ACCOUNT_ID, ACCOUNT_ID + 1, 
'a     +    b', 'c* d', ACCOUNT_ID + 2, count ( * ) from KYLIN_ACCOUNT group by 
ACCOUNT_ID limit 400 ";
+
+    private static final String realSqlWithBinary = "select ACCOUNT_ID, 
ACCOUNT_ID + 1, x'a+ b', X'c *     d', ACCOUNT_ID + 2, count ( * ) from 
KYLIN_ACCOUNT group by ACCOUNT_ID limit 400 ";
+
+    private static final String realSql = "select ACCOUNT_ID, ACCOUNT_ID + 1, 
'a+ b', 'c * d', ACCOUNT_ID + 2, count ( * ) from KYLIN_ACCOUNT group by 
ACCOUNT_ID limit 400 ";
+
+    @Test
+    public void testMultiWhiteSpaces() throws ParseException {
+        String sql = "select   ACCOUNT_ID, ACCOUNT_ID + 1 ,'a     +    b'  
,'c* d', ACCOUNT_ID +2,count(*) from KYLIN_ACCOUNT group  by  ACCOUNT_ID  limit 
 400   ";
+        WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, 
sql.trim());
+        String parsed = whiteSpaceParser.parse();
+        Assert.assertEquals(expected, parsed);
+    }
+
+    @Test
+    public void testNewLines() throws ParseException {
+        String newLines = "select   ACCOUNT_ID, ACCOUNT_ID + 1 ,'a     +    b' 
 ,'c* d', ACCOUNT_ID +2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        testEqual(newLines);
+    }
+
+    @Test
+    public void testCommas() throws ParseException {
+        String commas = "select   ACCOUNT_ID   ,     ACCOUNT_ID + 1  ,  'a     
+    b'  ,'c* d', ACCOUNT_ID +2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        testEqual(commas);
+    }
+
+    @Test
+    public void testOperators() throws ParseException {
+        String operators = "select   ACCOUNT_ID   ,     ACCOUNT_ID     +    1  
,  'a     +    b'  ,'c* d', ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        testEqual(operators);
+    }
+
+    private void testEqual(String sql) throws ParseException {
+        WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, 
sql.trim());
+        String parsed = whiteSpaceParser.parse();
+        Assert.assertEquals(expected, parsed);
+    }
+
+    @Test
+    public void testNormalEscapeString() throws ParseException {
+        String sql = "select   ACCOUNT_ID   ,     ACCOUNT_ID     +    1  ,  
'a+ b'  ,'c * d', ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, 
sql.trim());
+        String parsed = whiteSpaceParser.parse();
+        Assert.assertNotEquals(expected, parsed);
+        Assert.assertEquals(realSql, parsed);
+    }
+
+    @Test
+    public void testBinaryEscapeStringWithRegexWhiteSpace() throws 
ParseException {
+        String sql = "select   ACCOUNT_ID   ,     ACCOUNT_ID     +    1  ,  
x'a+ b'  ,X'c *     d', ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        String sql1 = "select   ACCOUNT_ID   ,  \n   ACCOUNT_ID   \r  +    1  
,  x'a+ b'  ,X'c *     d',\f ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        String sql2 = "select   ACCOUNT_ID   ,  \n\n   ACCOUNT_ID   \r\r  +    
1  ,  x'a+ b'  ,X'c *     d',\f ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        String sql3 = "select \n  ACCOUNT_ID   ,  \n\n\r\t   ACCOUNT_ID   \r\r 
 +    1  ,  x'a+ b'  ,X'c *     d',\f ACCOUNT_ID+2,count(*) from "
+                + "KYLIN_ACCOUNT " + "group   by  ACCOUNT_ID  " + "limit  400  
 ";
+        testBinary(sql);
+        testBinary(sql1);
+        testBinary(sql2);
+        testBinary(sql3);
+    }
+
+    private void testBinary(String sql) throws ParseException {
+        WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, 
sql.trim());
+        String parsed = whiteSpaceParser.parse();
+        Assert.assertNotEquals(expected, parsed);
+        Assert.assertEquals(realSqlWithBinary, parsed);
+    }
+
+    @Test
+    public void testExtractEpoch() throws ParseException {
+        String sql = "SELECT ((EXTRACT(EPOCH FROM (DATE '2050-01-01')) / ( 60 
* 60 * 24)) + (365 * 70 + 17)) AS \"TEMP(Test)(3947742720)(0)\" FROM 
\"TDVT\".\"CALCS\" \"CALCS\" HAVING (COUNT(1) > 0)";
+        WhiteSpaceParser whiteSpaceParser = new WhiteSpaceParser(dialect, 
sql.trim());
+        String parsed = whiteSpaceParser.parse();
+        String real = "SELECT (({fn EPOCH((DATE '2050-01-01' ))} /  (60 *  60 
*  24 ) ) +  (365 *  70 +  17 ) ) AS \"TEMP(Test)(3947742720)(0)\" FROM 
\"TDVT\" . \"CALCS\" \"CALCS\" HAVING (COUNT (1 ) > 0 ) ";
+        Assert.assertEquals(real, parsed);
+    }
+}

Reply via email to