siddharthteotia commented on a change in pull request #7568: URL: https://github.com/apache/pinot/pull/7568#discussion_r745782668
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/BaseOperator.java ########## @@ -52,4 +54,25 @@ public final T nextBlock() { // Make it protected because we should always call nextBlock() protected abstract T getNextBlock(); + + // Enforcing sub-class to implement the getOperatorName(), as they can just return a static final, + // as opposed to this super class calling getClass().getSimpleName(). + public abstract String getOperatorName(); + + public abstract String getExplainPlanName(); + + @Override + public List<Operator> getChildOperators() { + return new ArrayList<>(); Review comment: Collections.emptyList() might be better ? ########## File path: pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java ########## @@ -177,4 +177,55 @@ public void testBrokerRequestHandlerWithAsFunction() Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0)[1], 1577836800000L); Assert.assertEquals(brokerResponse.getTotalDocs(), 0); } + + /** Tests for EXPLAIN PLAN for literal only queries. */ + @Test + public void testExplainPlanLiteralOnly() + throws Exception { + SingleConnectionBrokerRequestHandler requestHandler = + new SingleConnectionBrokerRequestHandler(new PinotConfiguration(), null, ACCESS_CONTROL_FACTORY, null, null, + new BrokerMetrics("", PinotMetricUtils.getPinotMetricsRegistry(), true, Collections.emptySet()), null); + + // Test 1: select constant + JsonNode request = new ObjectMapper().readTree("{\"sql\":\"EXPLAIN PLAN FOR SELECT 1.5, 'test'\"}"); + RequestStatistics requestStats = new RequestStatistics(); + BrokerResponseNative brokerResponse = requestHandler.handleRequest(request, null, requestStats); + + checkExplainResultSchema(brokerResponse.getResultTable().getDataSchema(), + new String[]{"Operator", "Operator_Id", "Parent_Id"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT}); + + Assert.assertEquals(brokerResponse.getResultTable().getRows().size(), 1); + Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0), + new Object[]{"SELECT(selectList:literal)", 0, -1}); + Assert.assertEquals(brokerResponse.getTotalDocs(), 0); + + // Test 2: invoke compile time function -> literal only + long currentTsMin = System.currentTimeMillis(); + request = new ObjectMapper().readTree( + "{\"sql\":\"EXPLAIN PLAN FOR SELECT 6+8 as addition, fromDateTime('2020-01-01 UTC', 'yyyy-MM-dd z') as " + + "firstDayOf2020\"}"); + requestStats = new RequestStatistics(); + brokerResponse = requestHandler.handleRequest(request, null, requestStats); + + checkExplainResultSchema(brokerResponse.getResultTable().getDataSchema(), + new String[]{"Operator", "Operator_Id", "Parent_Id"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT}); + + Assert.assertEquals(brokerResponse.getResultTable().getRows().size(), 1); + Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0), + new Object[]{"SELECT(selectList:literal)", 0, -1}); Review comment: This generic selectList:literal plan does not look correct especially for this query for couple of reasons - We are selecting 2 columns 6+8 (14) and fromDateTime value. The plan only shows one column - This is a combination of both compile time function invocation followed by literal only. The plan doesn't reflect that ########## File path: pinot-common/src/main/java/org/apache/pinot/common/request/Selection.java ########## @@ -124,15 +124,15 @@ public short getThriftFieldId() { public static final java.util.Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { java.util.Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new java.util.EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.SELECTION_COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("selectionColumns", org.apache.thrift.TFieldRequirementType.OPTIONAL, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, + tmpMap.put(_Fields.SELECTION_COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("selectionColumns", org.apache.thrift.TFieldRequirementType.OPTIONAL, Review comment: Was the current (before this PR) formatting incorrect for these files in` common/request` ? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/DocIdSetOperator.java ########## @@ -34,6 +37,7 @@ */ public class DocIdSetOperator extends BaseOperator<DocIdSetBlock> { private static final String OPERATOR_NAME = "DocIdSetOperator"; + private static final String EXPLAIN_NAME = null; Review comment: Why null ? Is it because we want to skip this operator ? ########## File path: pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java ########## @@ -177,4 +177,55 @@ public void testBrokerRequestHandlerWithAsFunction() Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0)[1], 1577836800000L); Assert.assertEquals(brokerResponse.getTotalDocs(), 0); } + + /** Tests for EXPLAIN PLAN for literal only queries. */ + @Test + public void testExplainPlanLiteralOnly() + throws Exception { + SingleConnectionBrokerRequestHandler requestHandler = + new SingleConnectionBrokerRequestHandler(new PinotConfiguration(), null, ACCESS_CONTROL_FACTORY, null, null, + new BrokerMetrics("", PinotMetricUtils.getPinotMetricsRegistry(), true, Collections.emptySet()), null); + + // Test 1: select constant + JsonNode request = new ObjectMapper().readTree("{\"sql\":\"EXPLAIN PLAN FOR SELECT 1.5, 'test'\"}"); + RequestStatistics requestStats = new RequestStatistics(); + BrokerResponseNative brokerResponse = requestHandler.handleRequest(request, null, requestStats); + + checkExplainResultSchema(brokerResponse.getResultTable().getDataSchema(), + new String[]{"Operator", "Operator_Id", "Parent_Id"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT}); + + Assert.assertEquals(brokerResponse.getResultTable().getRows().size(), 1); + Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0), + new Object[]{"SELECT(selectList:literal)", 0, -1}); Review comment: It should show 1.5 imo ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/BaseOperator.java ########## @@ -52,4 +54,25 @@ public final T nextBlock() { // Make it protected because we should always call nextBlock() protected abstract T getNextBlock(); + + // Enforcing sub-class to implement the getOperatorName(), as they can just return a static final, + // as opposed to this super class calling getClass().getSimpleName(). + public abstract String getOperatorName(); Review comment: I thought there was another method we discussed w.r.t whether a node should be skipped or not ? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/ProjectionOperator.java ########## @@ -68,6 +72,36 @@ public String getOperatorName() { return OPERATOR_NAME; } + @Override + public String getExplainPlanName() { + return EXPLAIN_NAME; + } + + @Override + public List<Operator> getChildOperators() { + return Collections.singletonList(_docIdSetOperator); + } + + @Override + public String toExplainString() { + StringBuilder stringBuilder = new StringBuilder(getExplainPlanName()).append('('); + if (_dataSourceMap.keySet().isEmpty()) { + // count aggregation function has empty input expressions + stringBuilder.append("ALL"); Review comment: I don't follow this. `PROJECT ALL` makes sense when we are projecting / selecting all columns. However, this comment is implying that we will do `PROJECT ALL` for count aggregation function which doesn't sound right ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/BaseOperator.java ########## @@ -52,4 +54,25 @@ public final T nextBlock() { // Make it protected because we should always call nextBlock() protected abstract T getNextBlock(); + + // Enforcing sub-class to implement the getOperatorName(), as they can just return a static final, + // as opposed to this super class calling getClass().getSimpleName(). + public abstract String getOperatorName(); + + public abstract String getExplainPlanName(); + + @Override + public List<Operator> getChildOperators() { + return new ArrayList<>(); Review comment: I also feel that not making this abstract has the potential for problems if someone who is writing a new operator misses out on implementing this interface (because compiler won't complain) but the operator has one or more child operators. So I would suggest to make this abstract and force every operator to implement this even if it has to return empty list ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/ExplainPlanQueriesTest.java ########## @@ -0,0 +1,510 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.IndexingConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.utils.ReadMode; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class ExplainPlanQueriesTest extends BaseQueriesTest { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "ExplainPlanQueriesTest"); + private static final String RAW_TABLE_NAME = "testTable"; + private static final String SEGMENT_NAME = "testSegment"; + private static final int NUM_RECORDS = 10; + + private final static String COL1_NO_INDEX = "noIndexCol1"; + private final static String COL2_NO_INDEX = "noIndexCol2"; + private final static String COL3_NO_INDEX = "noIndexCol3"; + private final static String COL1_INVERTED_INDEX = "invertedIndexCol1"; + private final static String COL2_INVERTED_INDEX = "invertedIndexCol2"; + private final static String COL3_INVERTED_INDEX = "invertedIndexCol3"; + private final static String COL1_RANGE_INDEX = "rangeIndexCol1"; + private final static String COL2_RANGE_INDEX = "rangeIndexCol2"; + private final static String COL3_RANGE_INDEX = "rangeIndexCol3"; + private final static String COL1_SORTED_INDEX = "sortedIndexCol1"; + private final static String COL1_JSON_INDEX = "jsonIndexCol1"; + private final static String COL1_TEXT_INDEX = "textIndexCol1"; + + private static final Schema SCHEMA = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME) + .addSingleValueDimension(COL1_NO_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL2_NO_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL3_NO_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL1_INVERTED_INDEX, FieldSpec.DataType.DOUBLE) + .addSingleValueDimension(COL2_INVERTED_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL3_INVERTED_INDEX, FieldSpec.DataType.STRING) + .addSingleValueDimension(COL1_RANGE_INDEX, FieldSpec.DataType.DOUBLE) + .addSingleValueDimension(COL2_RANGE_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL3_RANGE_INDEX, FieldSpec.DataType.INT) + .addSingleValueDimension(COL1_SORTED_INDEX, FieldSpec.DataType.DOUBLE) + .addSingleValueDimension(COL1_JSON_INDEX, FieldSpec.DataType.JSON) + .addSingleValueDimension(COL1_TEXT_INDEX, FieldSpec.DataType.STRING).build(); + + private static final DataSchema DATA_SCHEMA = new DataSchema(new String[]{"Operator", "Operator_Id", "Parent_Id"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.STRING, DataSchema.ColumnDataType.INT, + DataSchema.ColumnDataType.INT}); + + private static final TableConfig TABLE_CONFIG = + new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build(); + + private IndexSegment _indexSegment; + private List<IndexSegment> _indexSegments; + + @Override + protected String getFilter() { + return ""; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List<IndexSegment> getIndexSegments() { + return _indexSegments; + } + + GenericRow createRecord(double doubleValue, int intValue, String stringValue, String jsonValue) { + GenericRow record = new GenericRow(); + record.putValue(COL1_NO_INDEX, intValue); + record.putValue(COL2_NO_INDEX, intValue); + record.putValue(COL3_NO_INDEX, intValue); + + record.putValue(COL1_INVERTED_INDEX, doubleValue); + record.putValue(COL2_INVERTED_INDEX, intValue); + record.putValue(COL3_INVERTED_INDEX, intValue); + + record.putValue(COL1_RANGE_INDEX, doubleValue); + record.putValue(COL2_RANGE_INDEX, intValue); + record.putValue(COL3_RANGE_INDEX, intValue); + + record.putValue(COL1_JSON_INDEX, jsonValue); + record.putValue(COL1_TEXT_INDEX, stringValue); + + return record; + } + + @BeforeClass + public void setUp() + throws Exception { + FileUtils.deleteDirectory(INDEX_DIR); + + List<GenericRow> records = new ArrayList<>(NUM_RECORDS); + records.add(createRecord(1.1, 2, "daffy", "{\"first\": \"daffy\", \"last\": \"duck\"}")); + + IndexingConfig indexingConfig = TABLE_CONFIG.getIndexingConfig(); + + List<String> invertedIndexColumns = Arrays.asList(COL1_INVERTED_INDEX, COL2_INVERTED_INDEX, COL3_INVERTED_INDEX); + indexingConfig.setInvertedIndexColumns(invertedIndexColumns); + + List<String> rangeIndexColumns = Arrays.asList(COL1_RANGE_INDEX, COL2_RANGE_INDEX, COL3_RANGE_INDEX); + indexingConfig.setRangeIndexColumns(rangeIndexColumns); + + List<String> jsonIndexColumns = Arrays.asList(COL1_JSON_INDEX); + indexingConfig.setJsonIndexColumns(jsonIndexColumns); + + SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); + segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); + segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); + segmentGeneratorConfig.setOutDir(INDEX_DIR.getPath()); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + driver.init(segmentGeneratorConfig, new GenericRowRecordReader(records)); + driver.build(); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + indexLoadingConfig.setTableConfig(TABLE_CONFIG); + indexLoadingConfig.setInvertedIndexColumns(new HashSet<>(invertedIndexColumns)); + indexLoadingConfig.setRangeIndexColumns(new HashSet<>(rangeIndexColumns)); + indexLoadingConfig.setJsonIndexColumns(new HashSet<String>(jsonIndexColumns)); + indexLoadingConfig.setReadMode(ReadMode.mmap); + + ImmutableSegment immutableSegment = + ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); + _indexSegment = immutableSegment; + _indexSegments = Arrays.asList(immutableSegment); + } + + /** Checks the correctness of EXPLAIN PLAN output. */ + private void check(String query, ResultTable expected) { + BrokerResponseNative response = getBrokerResponseForSqlQuery(query); + ResultTable actual = response.getResultTable(); + + System.out.println(resultTableToString("Expected", expected)); + System.out.println(resultTableToString("Actual", actual)); + + Assert.assertEquals(actual.getDataSchema(), expected.getDataSchema()); + Assert.assertEquals(actual.getRows().size(), expected.getRows().size()); + + List<Object[]> expectedRows = expected.getRows(); + List<Object[]> actualRows = actual.getRows(); + for (int i = 0; i < expectedRows.size(); i++) { + Object[] expectedRow = expectedRows.get(i); + Object[] actualRow = actualRows.get(i); + Assert.assertEquals(actualRow.length, expectedRow.length); + + for (int j = 0; j < actualRow.length; j++) { + Assert.assertEquals(actualRow[j], expectedRow[j]); + } + } + } + + /** This function is used to generated expected results while creating test cases; otherwise, not used. */ + private static String resultTableToString(String tag, ResultTable table) { + System.out.println("\n" + tag + ":"); + List<Object[]> rows = table.getRows(); + + StringBuffer buffer = new StringBuffer(); + for (Object[] row : rows) { + buffer.append("result.add(new Object[]{\"" + row[0] + "\", " + row[1] + ", " + row[2] + "});\n"); + } + + return buffer.toString(); + } + + @Test + public void testSelect() { + String query1 = "EXPLAIN PLAN FOR SELECT * FROM testTable"; + List<Object[]> result1 = new ArrayList<>(); + result1.add(new Object[]{"BROKER_REDUCE(limit:10)", 0, -1}); + result1.add(new Object[]{"COMBINE_SELECT", 1, 0}); + result1.add( + new Object[]{"SELECT(selectList:invertedIndexCol1, invertedIndexCol2, invertedIndexCol3, jsonIndexCol1, " + + "noIndexCol1, noIndexCol2, noIndexCol3, rangeIndexCol1, rangeIndexCol2, rangeIndexCol3, " + + "sortedIndexCol1, textIndexCol1)", 2, 1}); + result1.add( + new Object[]{"TRANSFORM_PASSTHROUGH(transformFuncs:invertedIndexCol1, invertedIndexCol2, invertedIndexCol3, " + + "jsonIndexCol1, noIndexCol1, noIndexCol2, noIndexCol3, rangeIndexCol1, rangeIndexCol2, rangeIndexCol3, " + + "sortedIndexCol1, textIndexCol1)", 3, 2}); + result1.add( + new Object[]{"PROJECT(sortedIndexCol1, noIndexCol3, rangeIndexCol1, rangeIndexCol2, jsonIndexCol1, " + + "invertedIndexCol1, noIndexCol2, invertedIndexCol2, noIndexCol1, invertedIndexCol3, rangeIndexCol3, " + + "textIndexCol1)", 4, 3}); + result1.add(new Object[]{"FILTER_MATCH_ENTIRE_SEGMENT(docs:1)", 5, 4}); + check(query1, new ResultTable(DATA_SCHEMA, result1)); + + String query2 = "EXPLAIN PLAN FOR SELECT 'mickey' FROM testTable"; + List<Object[]> result2 = new ArrayList<>(); + result2.add(new Object[]{"BROKER_REDUCE(limit:10)", 0, -1}); + result2.add(new Object[]{"COMBINE_SELECT", 1, 0}); + result2.add(new Object[]{"SELECT(selectList:'mickey')", 2, 1}); + result2.add(new Object[]{"PROJECT(ALL)", 3, 2}); Review comment: This is confusing. Why use `PROJECT (ALL)` when we are only selecting / projecting a single column. In fact, this is counter-intuitive. Previous query uses `SELECT *` so `PROJECT(ALL)` makes sense there but we don't use `PROJECT (ALL)` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/BaseOperator.java ########## @@ -52,4 +54,25 @@ public final T nextBlock() { // Make it protected because we should always call nextBlock() protected abstract T getNextBlock(); + + // Enforcing sub-class to implement the getOperatorName(), as they can just return a static final, + // as opposed to this super class calling getClass().getSimpleName(). + public abstract String getOperatorName(); + + public abstract String getExplainPlanName(); + + @Override + public List<Operator> getChildOperators() { + return new ArrayList<>(); + } + + @Override + public String toExplainString() { + return getExplainPlanName(); + } + + @Override Review comment: Why this change ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org