This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 22c81de538 Enhance range index rule and flag invalid queries (#8956)
22c81de538 is described below

commit 22c81de5383b19671213e8377445e1b3c79342ec
Author: Jia Guo <jia...@linkedin.com>
AuthorDate: Wed Jun 22 23:55:10 2022 -0700

    Enhance range index rule and flag invalid queries (#8956)
    
    * Enhance range index rule and flag invalid queries
    
    * Enhance range index rule and flag invalid queries
---
 .../controller/recommender/io/InputManager.java    |  17 +++
 .../recommender/rules/impl/RangeIndexRule.java     |   1 +
 .../rules/io/params/RangeIndexRuleParams.java      |  11 ++
 .../rules/io/params/RecommenderConstants.java      |   4 +
 .../controller/recommender/TestConfigEngine.java   |  14 ++
 .../InvalidColumnInFilterInput.json                | 170 +++++++++++++++++++++
 6 files changed, 217 insertions(+)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
index c6bbea84e4..fb74b85a48 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
@@ -66,6 +66,7 @@ import org.slf4j.LoggerFactory;
 
 import static java.lang.Math.max;
 import static 
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.*;
+import static 
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_COLUMN;
 import static 
org.apache.pinot.controller.recommender.rules.io.params.RecommenderConstants.FlagQueryRuleParams.ERROR_INVALID_QUERY;
 
 
@@ -170,6 +171,22 @@ public class InputManager {
         PinotQuery pinotQuery = 
CalciteSqlParser.compileToPinotQuery(queryString);
         _queryOptimizer.optimize(pinotQuery, _schema);
         QueryContext queryContext = 
QueryContextConverterUtils.getQueryContext(pinotQuery);
+
+        // Flag the queries having in filter columns not appear in schema
+        // to exclude user input like select i from tableName where a = xyz 
and t > 500
+        Set<String> filterColumns = new HashSet<>();
+        if (queryContext.getFilter() != null) {
+          // get in filter column names, excluding literals, etc
+          queryContext.getFilter().getColumns(filterColumns);
+          // remove those appear in schema
+          filterColumns.removeAll(_colNameToIntMap.keySet());
+          // flag if there are columns left
+          if (!filterColumns.isEmpty()) {
+            invalidQueries.add(queryString);
+            _overWrittenConfigs.getFlaggedQueries().add(queryString, 
ERROR_INVALID_COLUMN + filterColumns);
+          }
+        }
+
         _parsedQueries.put(queryString,
             Triple.of(_queryWeightMap.get(queryString), 
CalciteSqlCompiler.convertToBrokerRequest(pinotQuery),
                 queryContext));
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
index e393f5f4fe..17424923cb 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/impl/RangeIndexRule.java
@@ -71,6 +71,7 @@ public class RangeIndexRule extends AbstractRule {
       // As currently, only numeric columns are selected in range index 
creation, we will skip non numeric columns
       if (((weights[i] / totalWeight.get()) > 
_params._thresholdMinPercentRangeIndex) && !_output.getIndexConfig()
           .getSortedColumn().equals(colName) && 
!_output.getIndexConfig().getInvertedIndexColumns().contains(colName)
+          && _input.getCardinality(colName) > 
_params._thresholdMinCardinalityRangeIndex
           && _input.getFieldType(colName).isNumeric()) {
         _output.getIndexConfig().getRangeIndexColumns().add(colName);
       }
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
index 5831930982..da9abcb213 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RangeIndexRuleParams.java
@@ -28,6 +28,17 @@ import com.fasterxml.jackson.annotation.Nulls;
 public class RangeIndexRuleParams {
   public Double _thresholdMinPercentRangeIndex =
       
RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX;
+  public Double _thresholdMinCardinalityRangeIndex =
+      
RecommenderConstants.RangeIndexRule.DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX;
+
+  public Double getThresholdMinCardinalityRangeIndex() {
+    return _thresholdMinCardinalityRangeIndex;
+  }
+
+  @JsonSetter(value = "THRESHOLD_MIN_CARDINALITY_RANGE_INDEX", nulls = 
Nulls.SKIP)
+  public void setThresholdMinCardinalityRangeIndex(Double 
thresholdMinCardinalityRangeIndex) {
+    _thresholdMinCardinalityRangeIndex = thresholdMinCardinalityRangeIndex;
+  }
 
   public Double getThresholdMinPercentRangeIndex() {
     return _thresholdMinPercentRangeIndex;
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
index 05e3d0219a..2cda521399 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/rules/io/params/RecommenderConstants.java
@@ -71,6 +71,7 @@ public class RecommenderConstants {
 
   public static class RangeIndexRule {
     public static final double DEFAULT_THRESHOLD_MIN_PERCENT_RANGE_INDEX = 0.4;
+    public static final double DEFAULT_THRESHOLD_MIN_CARDINALITY_RANGE_INDEX = 
2;
   }
 
   public static class NoDictionaryOnHeapDictionaryJointRule {
@@ -94,6 +95,9 @@ public class RecommenderConstants {
     public static final String WARNING_TOO_LONG_LIMIT =
         "Warning: Please verify if you need to pull out huge number of records 
for this query. Consider using smaller"
             + " limit than " + DEFAULT_THRESHOLD_MAX_LIMIT_SIZE;
+
+    public static final String ERROR_INVALID_COLUMN =
+        "ERROR: Query is filtering on columns not appearing in schema: ";
     public static final String ERROR_INVALID_QUERY = "Error: Invalid query 
syntax. Please fix the query";
   }
 
diff --git 
a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
 
b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
index 7f96d8378b..26d33570b0 100644
--- 
a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
+++ 
b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
@@ -132,6 +132,20 @@ public class TestConfigEngine {
     assertEquals(output.getIndexConfig().getSortedColumn(), "c");
   }
 
+  @Test
+  void testInvalidColumnInFilterRule()
+      throws InvalidInputException, IOException {
+    loadInput("recommenderInput/InvalidColumnInFilterInput.json");
+    ConfigManager output = new ConfigManager();
+    AbstractRule abstractRule =
+        
RulesToExecute.RuleFactory.getRule(RulesToExecute.Rule.InvertedSortedIndexJointRule,
 _input, output);
+    abstractRule.run();
+    assertEquals(output.getIndexConfig().getInvertedIndexColumns().toString(), 
"[]");
+    
assertEquals(_input.getOverWrittenConfigs().getFlaggedQueries().getFlaggedQueries().toString(),
+        "{select i from tableName where a = xyz and t > 500=ERROR: "
+            + "Query is filtering on columns not appearing in schema: [xyz]}");
+  }
+
   @Test
   void testSortedInvertedIndexJointRuleWithMetricAndDateTimeColumn()
       throws InvalidInputException, IOException {
diff --git 
a/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
 
b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
new file mode 100644
index 0000000000..0e45462f10
--- /dev/null
+++ 
b/pinot-controller/src/test/resources/recommenderInput/InvalidColumnInFilterInput.json
@@ -0,0 +1,170 @@
+{
+  "schema":{
+    "schemaName": "tableSchema",
+    "dimensionFieldSpecs": [
+      {
+        "name": "a",
+        "dataType": "INT",
+        "cardinality":20,
+        "numValuesPerEntry":1
+      },
+      {
+        "name": "b",
+        "dataType": "DOUBLE",
+        "cardinality":6,
+        "singleValueField": false,
+        "numValuesPerEntry":1.5
+      },
+      {
+        "name": "c",
+        "dataType": "FLOAT",
+        "cardinality":7,
+        "numValuesPerEntry":1
+      },
+      {
+        "name": "d",
+        "dataType": "STRING",
+        "cardinality":41,
+        "singleValueField": false,
+        "numValuesPerEntry":2,
+        "averageLength" : 27
+      },
+      {
+        "name": "e",
+        "dataType": "LONG",
+        "cardinality":18,
+        "singleValueField": false,
+        "numValuesPerEntry":4
+      },
+      {
+        "name": "f",
+        "dataType": "DOUBLE",
+        "cardinality":13,
+        "singleValueField": false,
+        "numValuesPerEntry":3
+      },
+      {
+        "name": "g",
+        "dataType": "STRING",
+        "cardinality":6,
+        "singleValueField": false,
+        "numValuesPerEntry":2,
+        "averageLength" : 100
+      },
+      {
+        "name": "h",
+        "dataType": "BYTES",
+        "cardinality":12,
+        "numValuesPerEntry":1,
+        "averageLength" : 10
+      },
+      {
+        "name": "i",
+        "dataType": "STRING",
+        "singleValueField": false,
+        "cardinality":7,
+        "numValuesPerEntry":1,
+        "averageLength" : 25
+      },
+      {
+        "name": "j",
+        "dataType": "DOUBLE",
+        "cardinality":4,
+        "numValuesPerEntry":1.00000001
+      },
+      {
+        "name": "ja",
+        "dataType": "BOOLEAN"
+      },
+      {
+        "name": "jb",
+        "dataType": "BOOLEAN",
+        "numValuesPerEntry": 3
+      }
+    ],
+    "metricFieldSpecs": [
+      {
+        "name": "k",
+        "dataType": "DOUBLE",
+        "cardinality":10000,
+        "numValuesPerEntry":1,
+        "averageLength" : 100
+      },
+      {
+        "name": "l",
+        "dataType": "DOUBLE",
+        "cardinality":10000,
+        "numValuesPerEntry":1,
+        "averageLength" : 10
+      },
+      {
+        "name": "m",
+        "dataType": "BYTES",
+        "cardinality":10000,
+        "numValuesPerEntry":1,
+        "averageLength" : 25
+      },
+      {
+        "name": "n",
+        "dataType": "DOUBLE",
+        "cardinality":10000,
+        "numValuesPerEntry":1
+      },
+      {
+        "name": "o",
+        "dataType": "DOUBLE",
+        "cardinality":10000,
+        "numValuesPerEntry":1,
+        "averageLength" : 25
+      },
+      {
+        "name": "p",
+        "dataType": "DOUBLE",
+        "cardinality":10000,
+        "numValuesPerEntry":1
+      }
+    ],
+    "timeFieldSpec": {
+      "incomingGranularitySpec": {
+        "dataType": "INT",
+        "name": "t",
+        "timeType": "DAYS",
+        "cardinality":10000,
+        "numValuesPerEntry":1
+      }
+    }
+  },
+  "queriesWithWeights":{
+    "select i from tableName where a = xyz and t > 500": 1,
+    "select i from tableName where a = 43 and t > 500": 1,
+    "select i from tableName where a = 'xyz' and t > 500": 1,
+    "select i from tableName where a = b and t > 500": 1
+  },
+  "qps": 15000,
+  "numMessagesPerSecInKafkaTopic":1000,
+  "numRecordsPerPush":1000000000,
+  "tableType": "HYBRID",
+  "latencySLA": 500,
+
+  "rulesToExecute": {
+    "recommendInvertedSortedIndexJoint": true
+  },
+  "partitionRuleParams": {
+    "THRESHOLD_MAX_LATENCY_SLA_PARTITION": 1001
+  },
+  "bloomFilterRuleParams": {
+    "THRESHOLD_MIN_PERCENT_EQ_BLOOMFILTER" : 0.51
+  },
+  "invertedSortedIndexJointRuleParams": {
+    "THRESHOLD_RATIO_MIN_GAIN_DIFF_BETWEEN_ITERATION" : 0.06
+  },
+  "noDictionaryOnHeapDictionaryJointRuleParams": {
+    "THRESHOLD_MIN_PERCENT_DICTIONARY_STORAGE_SAVE" : 0.96
+  },
+  "overWrittenConfigs": {
+    "indexConfig": {
+      "invertedIndexColumns": ["a","b"],
+      "rangeIndexColumns": ["f"]
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to