Jackie-Jiang commented on code in PR #15498: URL: https://github.com/apache/pinot/pull/15498#discussion_r2037964294
########## pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java: ########## @@ -39,6 +39,49 @@ public static void validateFunction(String canonicalName, List<Expression> opera } } + /** + * Sanitize the sql string for parsing by normalizing whitespace which can + * cause performance issues with regex based parsing. + * This method replaces multiple consecutive whitespace characters with a single space. + * + * @param sql The raw SQL string to sanitize. May be null. + * @return A sanitized SQL string with normalized whitespace and no trailing spaces, + * or {@code null} if the input was {@code null}. + */ + public static String sanitizeSqlForParsing(String sql) { + if (sql == null) { + return null; + } + + // 1. Remove excessive whitespace + + int length = sql.length(); + StringBuilder builder = new StringBuilder(length); + boolean inWhitespaceBlock = false; + + for (int charIndex = 0; charIndex < length; charIndex++) { + char currentChar = sql.charAt(charIndex); + + if (Character.isWhitespace(currentChar)) { + if (currentChar == '\n' || currentChar == '\r') { + builder.append(currentChar); // preserve line breaks + inWhitespaceBlock = false; // reset space block + } else if (!inWhitespaceBlock) { + builder.append(' '); + inWhitespaceBlock = true; + } + } else { + builder.append(currentChar); + inWhitespaceBlock = false; // reset space block + } + } + + // Likewise extend for other improvements + + return builder.toString().trim(); + } + + Review Comment: (nit) Remove extra empty line ########## pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java: ########## @@ -39,6 +39,49 @@ public static void validateFunction(String canonicalName, List<Expression> opera } } + /** + * Sanitize the sql string for parsing by normalizing whitespace which can + * cause performance issues with regex based parsing. + * This method replaces multiple consecutive whitespace characters with a single space. + * + * @param sql The raw SQL string to sanitize. May be null. + * @return A sanitized SQL string with normalized whitespace and no trailing spaces, + * or {@code null} if the input was {@code null}. + */ + public static String sanitizeSqlForParsing(String sql) { + if (sql == null) { + return null; + } Review Comment: (minor) No need to handle `null`, and we can assume the input is not null ########## pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java: ########## @@ -39,6 +39,49 @@ public static void validateFunction(String canonicalName, List<Expression> opera } } + /** + * Sanitize the sql string for parsing by normalizing whitespace which can + * cause performance issues with regex based parsing. + * This method replaces multiple consecutive whitespace characters with a single space. + * + * @param sql The raw SQL string to sanitize. May be null. + * @return A sanitized SQL string with normalized whitespace and no trailing spaces, + * or {@code null} if the input was {@code null}. + */ + public static String sanitizeSqlForParsing(String sql) { + if (sql == null) { + return null; + } + + // 1. Remove excessive whitespace + + int length = sql.length(); + StringBuilder builder = new StringBuilder(length); + boolean inWhitespaceBlock = false; + + for (int charIndex = 0; charIndex < length; charIndex++) { + char currentChar = sql.charAt(charIndex); + + if (Character.isWhitespace(currentChar)) { + if (currentChar == '\n' || currentChar == '\r') { + builder.append(currentChar); // preserve line breaks + inWhitespaceBlock = false; // reset space block + } else if (!inWhitespaceBlock) { + builder.append(' '); + inWhitespaceBlock = true; + } + } else { + builder.append(currentChar); + inWhitespaceBlock = false; // reset space block + } + } + + // Likewise extend for other improvements + + return builder.toString().trim(); Review Comment: This can create extra garbage. It should be okay to have one extra space in the end. We should check the length of the builder, and only construct the string when the length changes -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org