This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 06888d8 Fix ByteArray datatype column metadata getMaxValue NPE bug and expose maxNumMultiValues (#7918) 06888d8 is described below commit 06888d8b60598b661ae90df7e7a2cc628460191a Author: Liang Mingqiang <mili...@linkedin.com> AuthorDate: Fri Jan 7 16:40:36 2022 -0800 Fix ByteArray datatype column metadata getMaxValue NPE bug and expose maxNumMultiValues (#7918) --- .../restlet/resources/TableMetadataInfo.java | 9 +- .../util/ServerSegmentMetadataReader.java | 11 +- .../tests/OfflineClusterIntegrationTest.java | 117 ++++++++++++++++----- .../pinot/server/api/resources/TablesResource.java | 22 ++-- 4 files changed, 118 insertions(+), 41 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java index c2c62d6..81d86b6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java @@ -41,18 +41,21 @@ public class TableMetadataInfo { private final long _numRows; private final Map<String, Double> _columnLengthMap; private final Map<String, Double> _columnCardinalityMap; + private final Map<String, Double> _maxNumMultiValuesMap; @JsonCreator public TableMetadataInfo(@JsonProperty("tableName") String tableName, @JsonProperty("diskSizeInBytes") long sizeInBytes, @JsonProperty("numSegments") long numSegments, @JsonProperty("numRows") long numRows, @JsonProperty("columnLengthMap") Map<String, Double> columnLengthMap, - @JsonProperty("columnCardinalityMap") Map<String, Double> columnCardinalityMap) { + @JsonProperty("columnCardinalityMap") Map<String, Double> columnCardinalityMap, + @JsonProperty("maxNumMultiValuesMap") Map<String, Double> maxNumMultiValuesMap) { _tableName = tableName; _diskSizeInBytes = sizeInBytes; _numSegments = numSegments; _numRows = numRows; _columnLengthMap = columnLengthMap; _columnCardinalityMap = columnCardinalityMap; + _maxNumMultiValuesMap = maxNumMultiValuesMap; } public String getTableName() { @@ -78,4 +81,8 @@ public class TableMetadataInfo { public Map<String, Double> getColumnCardinalityMap() { return _columnCardinalityMap; } + + public Map<String, Double> getMaxNumMultiValuesMap() { + return _maxNumMultiValuesMap; + } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java index 5fb8642..4ee509a 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java @@ -87,6 +87,7 @@ public class ServerSegmentMetadataReader { int failedParses = 0; final Map<String, Double> columnLengthMap = new HashMap<>(); final Map<String, Double> columnCardinalityMap = new HashMap<>(); + final Map<String, Double> maxNumMultiValuesMap = new HashMap<>(); for (Map.Entry<String, String> streamResponse : serviceResponse._httpResponses.entrySet()) { try { TableMetadataInfo tableMetadataInfo = @@ -96,6 +97,7 @@ public class ServerSegmentMetadataReader { totalNumSegments += tableMetadataInfo.getNumSegments(); tableMetadataInfo.getColumnLengthMap().forEach((k, v) -> columnLengthMap.merge(k, v, Double::sum)); tableMetadataInfo.getColumnCardinalityMap().forEach((k, v) -> columnCardinalityMap.merge(k, v, Double::sum)); + tableMetadataInfo.getMaxNumMultiValuesMap().forEach((k, v) -> maxNumMultiValuesMap.merge(k, v, Double::sum)); } catch (IOException e) { failedParses++; LOGGER.error("Unable to parse server {} response due to an error: ", streamResponse.getKey(), e); @@ -104,17 +106,18 @@ public class ServerSegmentMetadataReader { int finalTotalNumSegments = totalNumSegments; columnLengthMap.replaceAll((k, v) -> v / finalTotalNumSegments); columnCardinalityMap.replaceAll((k, v) -> v / finalTotalNumSegments); + maxNumMultiValuesMap.replaceAll((k, v) -> v / finalTotalNumSegments); // Since table segments may have multiple replicas, divide diskSizeInBytes, numRows and numSegments by numReplica - // to avoid double counting, for columnAvgLengthMap and columnAvgCardinalityMap, dividing by numReplica is not - // needed since totalNumSegments already contains replicas. + // to avoid double counting, for columnAvgLengthMap, columnAvgCardinalityMap and maxNumMultiValuesMap, dividing by + // numReplica is not needed since totalNumSegments already contains replicas. totalDiskSizeInBytes /= numReplica; totalNumSegments /= numReplica; totalNumRows /= numReplica; TableMetadataInfo aggregateTableMetadataInfo = - new TableMetadataInfo("", totalDiskSizeInBytes, totalNumSegments, totalNumRows, columnLengthMap, - columnCardinalityMap); + new TableMetadataInfo(tableNameWithType, totalDiskSizeInBytes, totalNumSegments, totalNumRows, columnLengthMap, + columnCardinalityMap, maxNumMultiValuesMap); if (failedParses != 0) { LOGGER.warn("Failed to parse {} / {} aggregated segment metadata responses from servers.", failedParses, serverUrls.size()); diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java index 519675c..5a41f91 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java @@ -123,6 +123,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet private static final String NUM_ROWS_KEY = "numRows"; private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap"; private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap"; + private static final String MAX_NUM_MULTI_VALUES_MAP_KEY = "maxNumMultiValuesMap"; // TODO: This might lead to flaky test, as this disk size is not deterministic // as it depends on the iteration order of a HashSet. private static final int DISK_SIZE_IN_BYTES = 20796000; @@ -1832,21 +1833,63 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet @Test public void testAggregateMetadataAPI() throws IOException { - JsonNode oneColumnResponse = JsonUtils.stringToJsonNode( - sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DestCityMarketID")); - assertEquals(oneColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); - assertEquals(oneColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); - assertEquals(oneColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); - assertEquals(oneColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1); - assertEquals(oneColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1); - - JsonNode threeColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + JsonNode oneSVColumnResponse = JsonUtils + .stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DestCityMarketID")); + assertEquals(oneSVColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(oneSVColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(oneSVColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(oneSVColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1); + assertEquals(oneSVColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1); + // DestCityMarketID is a SV column + assertEquals(oneSVColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0); + + JsonNode oneMVColumnResponse = JsonUtils + .stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DivLongestGTimes")); + assertEquals(oneMVColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(oneMVColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(oneMVColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(oneMVColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1); + assertEquals(oneMVColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1); + // DivLongestGTimes is a MV column + assertEquals(oneMVColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 1); + + JsonNode threeSVColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DivActualElapsedTime&columns=CRSElapsedTime&columns=OriginStateName")); - assertEquals(threeColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); - assertEquals(threeColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); - assertEquals(threeColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); - assertEquals(threeColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3); - assertEquals(threeColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3); + assertEquals(threeSVColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(threeSVColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(threeSVColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(threeSVColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3); + assertEquals(threeSVColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3); + assertEquals(threeSVColumnsResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0); + + JsonNode threeSVColumnsWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest( + _controllerBaseApiUrl + "/tables/mytable/metadata?columns=" + + "DivActualElapsedTime%26columns%3DCRSElapsedTime%26columns%3DOriginStateName")); + assertEquals(threeSVColumnsWholeEncodedResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(threeSVColumnsWholeEncodedResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(threeSVColumnsWholeEncodedResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(threeSVColumnsWholeEncodedResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3); + assertEquals(threeSVColumnsWholeEncodedResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3); + assertEquals(threeSVColumnsWholeEncodedResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0); + + JsonNode threeMVColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + + "/tables/mytable/metadata?columns=DivLongestGTimes&columns=DivWheelsOns&columns=DivAirports")); + assertEquals(threeMVColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(threeMVColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(threeMVColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(threeMVColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3); + assertEquals(threeMVColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3); + assertEquals(threeMVColumnsResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 3); + + JsonNode threeMVColumnsWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest( + _controllerBaseApiUrl + "/tables/mytable/metadata?columns=" + + "DivLongestGTimes%26columns%3DDivWheelsOns%26columns%3DDivAirports")); + assertEquals(threeMVColumnsWholeEncodedResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(threeMVColumnsWholeEncodedResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(threeMVColumnsWholeEncodedResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + assertEquals(threeMVColumnsWholeEncodedResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3); + assertEquals(threeMVColumnsWholeEncodedResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3); + assertEquals(threeMVColumnsWholeEncodedResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 3); JsonNode zeroColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata")); @@ -1855,21 +1898,39 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet assertEquals(zeroColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); assertEquals(zeroColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 0); assertEquals(zeroColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 0); + assertEquals(zeroColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0); - JsonNode allColumnResponse = + JsonNode starColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=*")); - assertEquals(allColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); - assertEquals(allColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); - assertEquals(allColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); - assertEquals(allColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 82); - assertEquals(allColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82); - - allColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest( - _controllerBaseApiUrl + "/tables/mytable/metadata?columns=CRSElapsedTime&columns=*&columns=OriginStateName")); - assertEquals(allColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); - assertEquals(allColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); - assertEquals(allColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); - assertEquals(allColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 82); - assertEquals(allColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82); + validateAllColumnsResponse(starColumnResponse); + + JsonNode starEncodedColumnResponse = + JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=%2A")); + validateAllColumnsResponse(starEncodedColumnResponse); + + JsonNode starWithExtraColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest( + _controllerBaseApiUrl + "/tables/mytable/metadata?columns=" + + "CRSElapsedTime&columns=*&columns=OriginStateName")); + validateAllColumnsResponse(starWithExtraColumnResponse); + + JsonNode starWithExtraEncodedColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest( + _controllerBaseApiUrl + "/tables/mytable/metadata?columns=" + + "CRSElapsedTime&columns=%2A&columns=OriginStateName")); + validateAllColumnsResponse(starWithExtraEncodedColumnResponse); + + JsonNode starWithExtraColumnWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest( + _controllerBaseApiUrl + "/tables/mytable/metadata?columns=" + + "CRSElapsedTime%26columns%3D%2A%26columns%3DOriginStateName")); + validateAllColumnsResponse(starWithExtraColumnWholeEncodedResponse); + } + + private void validateAllColumnsResponse(JsonNode response) { + assertEquals(response.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES); + assertEquals(response.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS); + assertEquals(response.get(NUM_ROWS_KEY).asInt(), NUM_ROWS); + // mytable has 82 columns, among them 9 of the columns are MV columns. + assertEquals(response.get(COLUMN_LENGTH_MAP_KEY).size(), 82); + assertEquals(response.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82); + assertEquals(response.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 9); } } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java index 693cabc..ebf3346 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java @@ -184,6 +184,7 @@ public class TablesResource { long totalNumRows = 0; Map<String, Double> columnLengthMap = new HashMap<>(); Map<String, Double> columnCardinalityMap = new HashMap<>(); + Map<String, Double> maxNumMultiValuesMap = new HashMap<>(); try { for (SegmentDataManager segmentDataManager : segmentDataManagers) { if (segmentDataManager instanceof ImmutableSegmentDataManager) { @@ -202,7 +203,7 @@ public class TablesResource { } for (String column : columnSet) { ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataMap().get(column); - int columnLength; + int columnLength = 0; DataType storedDataType = columnMetadata.getDataType().getStoredType(); if (storedDataType.isFixedWidth()) { // For type of fixed width: INT, LONG, FLOAT, DOUBLE, BOOLEAN (stored as INT), TIMESTAMP (stored as LONG), @@ -210,21 +211,26 @@ public class TablesResource { columnLength = storedDataType.size(); } else if (columnMetadata.hasDictionary()) { // For type of variable width (String, Bytes), if it's stored using dictionary encoding, set the - // columnLength as the max - // length in dictionary. + // columnLength as the max length in dictionary. columnLength = columnMetadata.getColumnMaxLength(); } else if (storedDataType == DataType.STRING || storedDataType == DataType.BYTES) { // For type of variable width (String, Bytes), if it's stored using raw bytes, set the columnLength as - // the length - // of the max value. - columnLength = ((String) columnMetadata.getMaxValue()).getBytes(StandardCharsets.UTF_8).length; + // the length of the max value. + if (columnMetadata.getMaxValue() != null) { + String maxValueString = (String) columnMetadata.getMaxValue(); + columnLength = maxValueString.getBytes(StandardCharsets.UTF_8).length; + } } else { // For type of STRUCT, MAP, LIST, set the columnLength as DEFAULT_MAX_LENGTH (512). columnLength = FieldSpec.DEFAULT_MAX_LENGTH; } - int columnCardinality = segmentMetadata.getColumnMetadataMap().get(column).getCardinality(); + int columnCardinality = columnMetadata.getCardinality(); columnLengthMap.merge(column, (double) columnLength, Double::sum); columnCardinalityMap.merge(column, (double) columnCardinality, Double::sum); + if (!columnMetadata.isSingleValue()) { + int maxNumMultiValues = columnMetadata.getMaxNumberOfMultiValues(); + maxNumMultiValuesMap.merge(column, (double) maxNumMultiValues, Double::sum); + } } } } @@ -239,7 +245,7 @@ public class TablesResource { TableMetadataInfo tableMetadataInfo = new TableMetadataInfo(tableDataManager.getTableName(), totalSegmentSizeBytes, segmentDataManagers.size(), - totalNumRows, columnLengthMap, columnCardinalityMap); + totalNumRows, columnLengthMap, columnCardinalityMap, maxNumMultiValuesMap); return ResourceUtils.convertToJsonString(tableMetadataInfo); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org