This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 290914c4f2 Normalize excessive whitespaces in sql to avoid regex 
performance issues (#15498)
290914c4f2 is described below

commit 290914c4f2b907bcd52a3527d28a99787651da70
Author: Jitendra Kumar <14930630+jitendrak...@users.noreply.github.com>
AuthorDate: Mon Apr 21 22:42:21 2025 +0530

    Normalize excessive whitespaces in sql to avoid regex performance issues 
(#15498)
---
 .../apache/pinot/sql/parsers/CalciteSqlParser.java |  2 +
 .../org/apache/pinot/sql/parsers/ParserUtils.java  | 21 +++++++
 .../apache/pinot/sql/parsers/ParserUtilsTest.java  | 67 ++++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index e28708faea..1742b406f7 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -100,6 +100,8 @@ public class CalciteSqlParser {
       throws SqlCompilationException {
     long parseStartTimeNs = System.nanoTime();
 
+    sql = ParserUtils.sanitizeSql(sql);
+
     // extract and remove OPTIONS string
     List<String> options = extractOptionsFromSql(sql);
     if (!options.isEmpty()) {
diff --git 
a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java 
b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
index efaf195756..df8527ee4c 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/ParserUtils.java
@@ -39,6 +39,27 @@ public class ParserUtils {
     }
   }
 
+  /**
+   * Sanitize the sql string for parsing by normalizing whitespace
+   * which is likely to cause performance issues with regex parsing.
+   * @param sql string to sanitize
+   * @return sanitized sql string
+   */
+  public static String sanitizeSql(String sql) {
+
+    // 1. Remove trailing whitespaces
+
+    int endIndex = sql.length() - 1;
+    while (endIndex >= 0 && Character.isWhitespace(sql.charAt(endIndex))) {
+      endIndex--;
+    }
+    sql = sql.substring(0, endIndex + 1);
+
+    // Likewise extend for other improvements
+
+    return sql;
+  }
+
   private static void validateJsonExtractScalarFunction(List<Expression> 
operands) {
     // Check that there are 3 or 4 arguments
     int numOperands = operands.size();
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java 
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java
new file mode 100644
index 0000000000..7a3393c9b2
--- /dev/null
+++ 
b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/ParserUtilsTest.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class ParserUtilsTest {
+
+  @Test
+  public void testRemoveExcessiveWhiteSpace() {
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * FROM mytable " + " ".repeat(20000),
+      "SELECT * FROM mytable"
+    );
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * FROM " + " ".repeat(20000) + " mytable",
+      "SELECT * FROM " + " ".repeat(20000) + " mytable"
+    );
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * " + " ".repeat(20000) + "FROM mytable " + " ".repeat(20000),
+      "SELECT * " + " ".repeat(20000) + "FROM mytable"
+    );
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " 
".repeat(20000),
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)"
+    );
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment 
*/" + " ".repeat(20000),
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b) /* comment 
*/"
+    );
+
+    testRemoveExcessiveWhiteSpace(
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " 
".repeat(20000) + " /* comment */",
+      "SELECT * FROM mytable" + " ".repeat(20000) + " options(a=b)" + " 
".repeat(20000) + " /* comment */"
+    );
+  }
+
+  private void testRemoveExcessiveWhiteSpace(
+      String sqlWithExcessiveWhitespace,
+      String expectedSqlAfterSanitization
+  ) {
+    String sanitizedSql = ParserUtils.sanitizeSql(sqlWithExcessiveWhitespace);
+    Assert.assertEquals(sanitizedSql, expectedSqlAfterSanitization);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to