This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 22c81de538 Enhance range index rule and flag invalid queries (#8956) 22c81de538 is described below commit 22c81de5383b19671213e8377445e1b3c79342ec Author: Jia Guo <jia...@linkedin.com> AuthorDate: Wed Jun 22 23:55:10 2022 -0700 Enhance range index rule and flag invalid queries (#8956) * Enhance range index rule and flag invalid queries * Enhance range index rule and flag invalid queries --- .../controller/recommender/io/InputManager.java | 17 +++ .../recommender/rules/impl/RangeIndexRule.java | 1 + .../rules/io/params/RangeIndexRuleParams.java | 11 ++ .../rules/io/params/RecommenderConstants.java | 4 + .../controller/recommender/TestConfigEngine.java | 14 ++ .../InvalidColumnInFilterInput.json | 170 +++++++++++++++++++++ 6 files changed, 217 insertions(+) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java index c6bbea84e4..fb74b85a48 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java @@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory; import static java.lang.Math.max; import static org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.*; +import static org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_COLUMN; import static org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_QUERY; @@ -170,6 +171,22 @@ public class InputManager { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(queryString); _queryOptimizer.optimize(pinotQuery, _schema); QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); + + // Flag the queries having in filter columns not appear in schema + // to exclude user input like select i from tableName where a = xyz and t > 500 + Set<String> filterColumns = new HashSet<>(); + if (queryContext.getFilter() != null) { + // get in filter column names, excluding literals, etc + queryContext.getFilter().getColumns(filterColumns); + // remove those appear in schema + filterColumns.removeAll(_colNameToIntMap.keySet()); + // flag if there are columns left + if (!filterColumns.isEmpty()) { + invalidQueries.add(queryString); + _overWrittenConfigs.getFlaggedQueries().add(queryString, ERROR_INVALID_COLUMN + filterColumns); + } + } + _parsedQueries.put(queryString, Triple.of(_queryWeightMap.get(queryString), CalciteSqlCompiler.convertToBrokerRequest(pinotQuery), queryContext)); diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java index e393f5f4fe..17424923cb 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java @@ -71,6 +71,7 @@ public class RangeIndexRule extends AbstractRule { // As currently, only numeric columns are selected in range index creation, we will skip non numeric columns if (((weights[i] / totalWeight.get()) > _params._thresholdMinPercentRangeIndex) && !_output.getIndexConfig() .getSortedColumn().equals(colName) && !_output.getIndexConfig().getInvertedIndexColumns().contains(colName) + && _input.getCardinality(colName) > _params._thresholdMinCardinalityRangeIndex && _input.getFieldType(colName).isNumeric()) { _output.getIndexConfig().getRangeIndexColumns().add(colName); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java index 5831930982..da9abcb213 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java @@ -28,6 +28,17 @@ import com.fasterxml.jackson.annotation.Nulls; public class RangeIndexRuleParams { public Double _thresholdMinPercentRangeIndex = RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX; + public Double _thresholdMinCardinalityRangeIndex = + RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX; + + public Double getThresholdMinCardinalityRangeIndex() { + return _thresholdMinCardinalityRangeIndex; + } + + @JsonSetter(value = "THRESHOLD_MIN_CARDINALITY_RANGE_INDEX", nulls = Nulls.SKIP) + public void setThresholdMinCardinalityRangeIndex(Double thresholdMinCardinalityRangeIndex) { + _thresholdMinCardinalityRangeIndex = thresholdMinCardinalityRangeIndex; + } public Double getThresholdMinPercentRangeIndex() { return _thresholdMinPercentRangeIndex; diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java index 05e3d0219a..2cda521399 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java @@ -71,6 +71,7 @@ public class RecommenderConstants { public static class RangeIndexRule { public static final double DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX = 0.4; + public static final double DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX = 2; } public static class NoDictionaryOnHeapDictionaryJointRule { @@ -94,6 +95,9 @@ public class RecommenderConstants { public static final String WARNING_TOO_LONG_LIMIT = "Warning: Please verify if you need to pull out huge number of records for this query. Consider using smaller" + " limit than " + DEFAULT_THRESHOLD_MAX_LIMIT_SIZE; + + public static final String ERROR_INVALID_COLUMN = + "ERROR: Query is filtering on columns not appearing in schema: "; public static final String ERROR_INVALID_QUERY = "Error: Invalid query syntax. Please fix the query"; } diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java index 7f96d8378b..26d33570b0 100644 --- a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java +++ b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java @@ -132,6 +132,20 @@ public class TestConfigEngine { assertEquals(output.getIndexConfig().getSortedColumn(), "c"); } + @Test + void testInvalidColumnInFilterRule() + throws InvalidInputException, IOException { + loadInput("recommenderInput/InvalidColumnInFilterInput.json"); + ConfigManager output = new ConfigManager(); + AbstractRule abstractRule = + RulesToExecute.RuleFactory.getRule(RulesToExecute.Rule.InvertedSortedIndexJointRule, _input, output); + abstractRule.run(); + assertEquals(output.getIndexConfig().getInvertedIndexColumns().toString(), "[]"); + assertEquals(_input.getOverWrittenConfigs().getFlaggedQueries().getFlaggedQueries().toString(), + "{select i from tableName where a = xyz and t > 500=ERROR: " + + "Query is filtering on columns not appearing in schema: [xyz]}"); + } + @Test void testSortedInvertedIndexJointRuleWithMetricAndDateTimeColumn() throws InvalidInputException, IOException { diff --git a/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json new file mode 100644 index 0000000000..0e45462f10 --- /dev/null +++ b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json @@ -0,0 +1,170 @@ +{ + "schema":{ + "schemaName": "tableSchema", + "dimensionFieldSpecs": [ + { + "name": "a", + "dataType": "INT", + "cardinality":20, + "numValuesPerEntry":1 + }, + { + "name": "b", + "dataType": "DOUBLE", + "cardinality":6, + "singleValueField": false, + "numValuesPerEntry":1.5 + }, + { + "name": "c", + "dataType": "FLOAT", + "cardinality":7, + "numValuesPerEntry":1 + }, + { + "name": "d", + "dataType": "STRING", + "cardinality":41, + "singleValueField": false, + "numValuesPerEntry":2, + "averageLength" : 27 + }, + { + "name": "e", + "dataType": "LONG", + "cardinality":18, + "singleValueField": false, + "numValuesPerEntry":4 + }, + { + "name": "f", + "dataType": "DOUBLE", + "cardinality":13, + "singleValueField": false, + "numValuesPerEntry":3 + }, + { + "name": "g", + "dataType": "STRING", + "cardinality":6, + "singleValueField": false, + "numValuesPerEntry":2, + "averageLength" : 100 + }, + { + "name": "h", + "dataType": "BYTES", + "cardinality":12, + "numValuesPerEntry":1, + "averageLength" : 10 + }, + { + "name": "i", + "dataType": "STRING", + "singleValueField": false, + "cardinality":7, + "numValuesPerEntry":1, + "averageLength" : 25 + }, + { + "name": "j", + "dataType": "DOUBLE", + "cardinality":4, + "numValuesPerEntry":1.00000001 + }, + { + "name": "ja", + "dataType": "BOOLEAN" + }, + { + "name": "jb", + "dataType": "BOOLEAN", + "numValuesPerEntry": 3 + } + ], + "metricFieldSpecs": [ + { + "name": "k", + "dataType": "DOUBLE", + "cardinality":10000, + "numValuesPerEntry":1, + "averageLength" : 100 + }, + { + "name": "l", + "dataType": "DOUBLE", + "cardinality":10000, + "numValuesPerEntry":1, + "averageLength" : 10 + }, + { + "name": "m", + "dataType": "BYTES", + "cardinality":10000, + "numValuesPerEntry":1, + "averageLength" : 25 + }, + { + "name": "n", + "dataType": "DOUBLE", + "cardinality":10000, + "numValuesPerEntry":1 + }, + { + "name": "o", + "dataType": "DOUBLE", + "cardinality":10000, + "numValuesPerEntry":1, + "averageLength" : 25 + }, + { + "name": "p", + "dataType": "DOUBLE", + "cardinality":10000, + "numValuesPerEntry":1 + } + ], + "timeFieldSpec": { + "incomingGranularitySpec": { + "dataType": "INT", + "name": "t", + "timeType": "DAYS", + "cardinality":10000, + "numValuesPerEntry":1 + } + } + }, + "queriesWithWeights":{ + "select i from tableName where a = xyz and t > 500": 1, + "select i from tableName where a = 43 and t > 500": 1, + "select i from tableName where a = 'xyz' and t > 500": 1, + "select i from tableName where a = b and t > 500": 1 + }, + "qps": 15000, + "numMessagesPerSecInKafkaTopic":1000, + "numRecordsPerPush":1000000000, + "tableType": "HYBRID", + "latencySLA": 500, + + "rulesToExecute": { + "recommendInvertedSortedIndexJoint": true + }, + "partitionRuleParams": { + "THRESHOLD_MAX_LATENCY_SLA_PARTITION": 1001 + }, + "bloomFilterRuleParams": { + "THRESHOLD_MIN_PERCENT_EQ_BLOOMFILTER" : 0.51 + }, + "invertedSortedIndexJointRuleParams": { + "THRESHOLD_RATIO_MIN_GAIN_DIFF_BETWEEN_ITERATION" : 0.06 + }, + "noDictionaryOnHeapDictionaryJointRuleParams": { + "THRESHOLD_MIN_PERCENT_DICTIONARY_STORAGE_SAVE" : 0.96 + }, + "overWrittenConfigs": { + "indexConfig": { + "invertedIndexColumns": ["a","b"], + "rangeIndexColumns": ["f"] + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org