ankitsultana commented on code in PR #15498:
URL: https://github.com/apache/pinot/pull/15498#discussion_r2033918326


##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/QueryOptionParser.java:
##########
@@ -0,0 +1,241 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.sql.parsers;
+
+import com.google.common.base.Preconditions;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.commons.math3.util.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class QueryOptionParser {
+
+    private static final int LENGTH_OF_OPTION = "option".length();
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(QueryOptionParser.class);
+
+    /** Half-open interval representing the option expression. */
+    @Nullable private Pair<Integer, Integer> _optionIndex;
+
+    private QueryOptionParser() {
+    }
+
+    /**
+     * Takes in the SQL query with or without options, stores the options in 
the map that is passed,
+     * and returns the SQL query without the options. We simply put each 
key-value pair to the map,
+     * overwriting any existing values. Option expression can only exist at 
the end of the query.
+     */
+    public static String extractQueryOptions(String sql, Map<String, String> 
sink, boolean stripQueryOptions) {
+        return new QueryOptionParser().extractQueryOptionsInternal(sql, sink, 
stripQueryOptions);
+    }
+
+    /**
+     * Removes query options from the sql, and returns the options in a map. 
Algorithm is:
+     *
+     * <ol>
+     *   <li>Convert to lowercase
+     *   <li>Find the last occurrence of the option keyword
+     *   <li>If it exists, try to extract key-value pairs, and strip the 
option expression from the
+     *       query
+     * </ol>
+     */
+    private String extractQueryOptionsInternal(String sql, Map<String, String> 
sink, boolean stripQueryOptions) {
+        String lowercaseSql = sql.toLowerCase(Locale.ENGLISH);
+        int lastIndexOfOptionKeyword = lowercaseSql.lastIndexOf("option");
+        if (lastIndexOfOptionKeyword == -1) {
+            return sql;
+        }
+        try {
+            Map<String, String> keyValuePairs = 
extractKeyValuePairs(lowercaseSql, lastIndexOfOptionKeyword);
+            sink.putAll(keyValuePairs);
+        } catch (Throwable ignored) {
+        }
+        if (_optionIndex == null) {
+            return sql;
+        }
+        return stripQueryOptions ? stripSubstring(sql, _optionIndex) : sql;
+    }
+
+    /**
+     * Given a lower case query, and an index representing the start of the 
"option" keyword, extracts
+     * all query option key-values that follow. This is O(n), and does 
allocations only for the option
+     * key-value pairs.
+     */
+    private Map<String, String> extractKeyValuePairs(String lowercaseSql, int 
startIndexOfOption) {
+        Map<String, String> result = new HashMap<>();
+        // Jump past the "option" keyword
+        int currentIndex = startIndexOfOption + LENGTH_OF_OPTION;
+        currentIndex = consumeCharacter(lowercaseSql, currentIndex, '(');
+        currentIndex = skipWhitespace(lowercaseSql, currentIndex);
+        if (currentIndex < lowercaseSql.length() && 
lowercaseSql.charAt(currentIndex) == ')') {
+            LOGGER.warn("[query-option-parser] Found empty option expression: 
'option()'");
+            _optionIndex = new Pair<>(startIndexOfOption, currentIndex + 1);
+            return result;
+        } else if (currentIndex < lowercaseSql.length() && 
lowercaseSql.charAt(currentIndex) == ',') {
+            LOGGER.warn("[query-option-parser] Found illegal empty option 
expression: 'option(,)'");
+            currentIndex++;
+            currentIndex = skipWhitespace(lowercaseSql, currentIndex);
+            Preconditions.checkState(
+                    currentIndex < lowercaseSql.length() && 
lowercaseSql.charAt(currentIndex) == ')');
+            _optionIndex = new Pair<>(startIndexOfOption, currentIndex + 1);
+            return result;
+        }
+        // loop-invariant: we progress by >3 characters in each iteration, 
because the consume methods
+        // consume at least
+        // 1 character.
+        while (currentIndex < lowercaseSql.length()) {
+
+            StringBuilder keyBuffer = new StringBuilder();
+            currentIndex = consumeToken(lowercaseSql, keyBuffer, currentIndex);
+
+            currentIndex = consumeCharacter(lowercaseSql, currentIndex, '=');
+
+            StringBuilder valueBuffer = new StringBuilder();
+            currentIndex = consumeToken(lowercaseSql, valueBuffer, 
currentIndex);
+
+            currentIndex = skipWhitespace(lowercaseSql, currentIndex);
+
+            Preconditions.checkState(currentIndex < lowercaseSql.length(), 
"Expected to read ')' or ','");
+            Preconditions.checkState(keyBuffer.length() > 0, "Empty keys 
should not be possible");
+            Preconditions.checkState(valueBuffer.length() > 0, "Empty values 
should not be possible");
+
+            result.put(keyBuffer.toString(), valueBuffer.toString());
+
+            if (lowercaseSql.charAt(currentIndex) == ',') {
+                currentIndex++;
+                skipWhitespace(lowercaseSql, currentIndex);
+            }
+            Preconditions.checkState(currentIndex < lowercaseSql.length(), 
"Expected to read ')' or ','");
+            if (lowercaseSql.charAt(currentIndex) == ')') {
+                currentIndex++;
+                break;
+            }
+        }
+        _optionIndex = new Pair<>(startIndexOfOption, currentIndex);
+        return result;
+    }
+
+    /**
+     * Starts at the given index in the input string, skips whitespaces, and 
returns the index if the
+     * expected character is found. Returns the index after the expected 
character.
+     *
+     * <p><b>Invariant:</b> Either progresses by at least 1 character, or 
throws, terminating
+     * execution.
+     */
+    static int consumeCharacter(String input, int currentIndex, char 
expectedCharacter) {
+        currentIndex = skipWhitespace(input, currentIndex);
+        if (currentIndex < input.length() && input.charAt(currentIndex) == 
expectedCharacter) {
+            return ++currentIndex;
+        }
+        // No message required since this is to terminate execution
+        throw new IllegalArgumentException();
+    }
+
+    /**
+     * Starts at the given index in the input string, skips whitespaces, and 
consumes a key or value
+     * token. Valid characters are: [a-zA-Z0-9_]. Returns the index after the 
last character of the
+     * token.
+     *
+     * <p><b>Invariant:</b> Either progresses by at least 1 character, or 
throws, terminating
+     * execution.
+     */
+    static int consumeToken(String input, StringBuilder tokenBuffer, int 
currentIndex) {
+        currentIndex = skipWhitespace(input, currentIndex);
+        if (isQuoteCharacter(input.charAt(currentIndex))) {
+            return consumeQuotedToken(input, tokenBuffer, currentIndex);
+        }
+        return consumeUnquotedToken(input, tokenBuffer, currentIndex);
+    }
+
+
+    /**
+     * Returns true if the character is a single or double quote.
+     */
+    static boolean isQuoteCharacter(char c) {
+        return (c == '\'' || c == '"');
+    }
+
+    /**
+     * Consumes a quoted token starting at the opening quote character.
+     * Ensures the token ends with a matching quote.
+     */
+    static int consumeQuotedToken(String input, StringBuilder tokenBuffer, int 
currentIndex) {
+
+        char quote = input.charAt(currentIndex);
+        Preconditions.checkState(isQuoteCharacter(quote),
+                "Expected the first character to be an opening quote 
character");
+
+        tokenBuffer.append(quote);
+        currentIndex++;
+
+        while (currentIndex < input.length() && input.charAt(currentIndex) != 
quote) {
+            tokenBuffer.append(input.charAt(currentIndex));
+            currentIndex++;
+        }
+
+        Preconditions.checkState(currentIndex < input.length() && 
input.charAt(currentIndex) == quote,
+                "Expected input to end with the same quote character as it 
started with");
+
+        tokenBuffer.append(quote);
+        currentIndex++;
+
+        return currentIndex;
+    }
+
+    /**
+     * Consumes an unquoted token consisting of valid characters.
+     * Fails if the token is empty.
+     */
+    static int consumeUnquotedToken(String input, StringBuilder tokenBuffer, 
int currentIndex) {

Review Comment:
   can you update this based on Pinot code style? (e.g. indent size should be 2)
   
   you can refer to 
https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#intellij



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to