This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 290914c4f2 Normalize excessive whitespaces in sql to avoid regex performance issues (#15498) 290914c4f2 is described below commit 290914c4f2b907bcd52a3527d28a99787651da70 Author: Jitendra Kumar <14930630+jitendrak...@users.noreply.github.com> AuthorDate: Mon Apr 21 22:42:21 2025 +0530 Normalize excessive whitespaces in sql to avoid regex performance issues (#15498) --- .../apache/pinot/sql/parsers/CalciteSqlParser.java | 2 + .../org/apache/pinot/sql/parsers/ParserUtils.java | 21 +++++++ .../apache/pinot/sql/parsers/ParserUtilsTest.java | 67 ++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index e28708faea..1742b406f7 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -100,6 +100,8 @@ public class CalciteSqlParser { throws SqlCompilationException { long parseStartTimeNs = System.nanoTime(); + sql = ParserUtils.sanitizeSql(sql); + // extract and remove OPTIONS string List<String> options = extractOptionsFromSql(sql); if (!options.isEmpty()) { diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java index efaf195756..df8527ee4c 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java @@ -39,6 +39,27 @@ public class ParserUtils { } } + /** + * Sanitize the sql string for parsing by normalizing whitespace + * which is likely to cause performance issues with regex parsing. + * @param sql string to sanitize + * @return sanitized sql string + */ + public static String sanitizeSql(String sql) { + + // 1. Remove trailing whitespaces + + int endIndex = sql.length() - 1; + while (endIndex >= 0 && Character.isWhitespace(sql.charAt(endIndex))) { + endIndex--; + } + sql = sql.substring(0, endIndex + 1); + + // Likewise extend for other improvements + + return sql; + } + private static void validateJsonExtractScalarFunction(List<Expression> operands) { // Check that there are 3 or 4 arguments int numOperands = operands.size(); diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java new file mode 100644 index 0000000000..7a3393c9b2 --- /dev/null +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.sql.parsers; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ParserUtilsTest { + + @Test + public void testRemoveExcessiveWhiteSpace() { + + testRemoveExcessiveWhiteSpace( + "SELECT * FROM mytable " + " ".repeat(20000), + "SELECT * FROM mytable" + ); + + testRemoveExcessiveWhiteSpace( + "SELECT * FROM " + " ".repeat(20000) + " mytable", + "SELECT * FROM " + " ".repeat(20000) + " mytable" + ); + + testRemoveExcessiveWhiteSpace( + "SELECT * " + " ".repeat(20000) + "FROM mytable " + " ".repeat(20000), + "SELECT * " + " ".repeat(20000) + "FROM mytable" + ); + + testRemoveExcessiveWhiteSpace( + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " ".repeat(20000), + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + ); + + testRemoveExcessiveWhiteSpace( + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment */" + " ".repeat(20000), + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment */" + ); + + testRemoveExcessiveWhiteSpace( + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " ".repeat(20000) + " /* comment */", + "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " ".repeat(20000) + " /* comment */" + ); + } + + private void testRemoveExcessiveWhiteSpace( + String sqlWithExcessiveWhitespace, + String expectedSqlAfterSanitization + ) { + String sanitizedSql = ParserUtils.sanitizeSql(sqlWithExcessiveWhitespace); + Assert.assertEquals(sanitizedSql, expectedSqlAfterSanitization); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org