Jackie-Jiang commented on a change in pull request #8358: URL: https://github.com/apache/pinot/pull/8358#discussion_r831390972
########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/ZKOperator.java ########## @@ -101,16 +102,17 @@ public void completeSegmentOperations(String tableNameWithType, SegmentMetadata LOGGER.info("Segment {} from table {} already exists, refreshing if necessary", segmentName, tableNameWithType); processExistingSegment(segmentMetadata, finalSegmentLocationURI, currentSegmentLocation, enableParallelPushProtection, headers, zkDownloadURI, crypter, tableNameWithType, segmentName, - segmentMetadataZNRecord, moveSegmentToFinalLocation); + segmentMetadataZNRecord, moveSegmentToFinalLocation, segmentSizeInBytes); } private void processExistingSegment(SegmentMetadata segmentMetadata, URI finalSegmentLocationURI, File currentSegmentLocation, boolean enableParallelPushProtection, HttpHeaders headers, String zkDownloadURI, String crypter, String tableNameWithType, String segmentName, ZNRecord znRecord, - boolean moveSegmentToFinalLocation) + boolean moveSegmentToFinalLocation, long segmentSizeInBytes) throws Exception { SegmentZKMetadata existingSegmentZKMetadata = new SegmentZKMetadata(znRecord); + existingSegmentZKMetadata.setSizeInBytes(segmentSizeInBytes); Review comment: Pass `segmentSizeInBytes` into `_pinotHelixResourceManager.refreshSegment()` ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/upload/ZKOperator.java ########## @@ -234,11 +236,14 @@ private void checkCRC(HttpHeaders headers, String offlineTableName, String segme private void processNewSegment(SegmentMetadata segmentMetadata, URI finalSegmentLocationURI, File currentSegmentLocation, String zkDownloadURI, HttpHeaders headers, String crypter, String tableNameWithType, - String segmentName, boolean moveSegmentToFinalLocation, boolean enableParallelPushProtection) + String segmentName, boolean moveSegmentToFinalLocation, boolean enableParallelPushProtection, + long segmentSizeInBytes) throws Exception { SegmentZKMetadata newSegmentZKMetadata = _pinotHelixResourceManager .constructZkMetadataForNewSegment(tableNameWithType, segmentMetadata, zkDownloadURI, crypter); + newSegmentZKMetadata.setSizeInBytes(segmentSizeInBytes); Review comment: Pass `segmentSizeInBytes` into `_pinotHelixResourceManager.constructZkMetadataForNewSegment()` ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/util/TableSizeReader.java ########## @@ -78,8 +82,8 @@ public TableSizeDetails getTableSizeDetails(@Nonnull String tableName, @Nonnegat Preconditions.checkNotNull(tableName, "Table name should not be null"); Preconditions.checkArgument(timeoutMsec > 0, "Timeout value must be greater than 0"); - boolean hasRealtimeTable = false; - boolean hasOfflineTable = false; + boolean hasRealtimeTable; Review comment: Since we need to read table config, directly read the table config to identify whether the table exists ########## File path: pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java ########## @@ -88,7 +94,10 @@ DROPPED_MINION_INSTANCES("droppedMinionInstances", true), // Number of online minion instances - ONLINE_MINION_INSTANCES("onlineMinionInstances", true); + ONLINE_MINION_INSTANCES("onlineMinionInstances", true), + + // Total size of compressed segments per table + TOTAL_SEGMENT_SIZE_PER_TABLE("bytes", false); Review comment: ```suggestion TABLE_COMPRESSED_SIZE("bytes", false); ``` ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/util/TableSizeReader.java ########## @@ -101,16 +105,46 @@ public TableSizeDetails getTableSizeDetails(@Nonnull String tableName, @Nonnegat tableSizeDetails._realtimeSegments = getTableSubtypeSize(realtimeTableName, timeoutMsec); tableSizeDetails._reportedSizeInBytes += tableSizeDetails._realtimeSegments._reportedSizeInBytes; tableSizeDetails._estimatedSizeInBytes += tableSizeDetails._realtimeSegments._estimatedSizeInBytes; + + // Get size in bytes per replica + TableConfig realtimeTableConfig = + ZKMetadataProvider.getRealtimeTableConfig(_helixResourceManager.getPropertyStore(), realtimeTableName); + long sizeInBytesPerReplica = tableSizeDetails._estimatedSizeInBytes / getNumReplicas(realtimeTableConfig); + + // Update table size metrics + _controllerMetrics.setValueOfTableGauge(realtimeTableName, + ControllerGauge.REALTIME_TABLE_ESTIMATED_SIZE_PER_REPLICA, sizeInBytesPerReplica); } if (hasOfflineTable) { String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(tableName); tableSizeDetails._offlineSegments = getTableSubtypeSize(offlineTableName, timeoutMsec); tableSizeDetails._reportedSizeInBytes += tableSizeDetails._offlineSegments._reportedSizeInBytes; tableSizeDetails._estimatedSizeInBytes += tableSizeDetails._offlineSegments._estimatedSizeInBytes; + + // Get size in bytes per replica + TableConfig offlineTableConfig = + ZKMetadataProvider.getOfflineTableConfig(_helixResourceManager.getPropertyStore(), offlineTableName); + long sizeInBytesPerReplica = tableSizeDetails._estimatedSizeInBytes / getNumReplicas(offlineTableConfig); + + // Update table size metrics + _controllerMetrics.setValueOfTableGauge(offlineTableName, + ControllerGauge.OFFLINE_TABLE_ESTIMATED_SIZE_PER_REPLICA, sizeInBytesPerReplica); } return tableSizeDetails; } + private int getNumReplicas(TableConfig tableConfig) { Review comment: We should extract this common logic into `PinotHelixResourceManager` ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java ########## @@ -214,17 +214,23 @@ private SuccessResponse uploadSegment(@Nullable String tableName, TableType tabl boolean uploadedSegmentIsEncrypted = !Strings.isNullOrEmpty(crypterClassNameInHeader); FileUploadDownloadClient.FileUploadType uploadType = getUploadType(uploadTypeStr); File dstFile = uploadedSegmentIsEncrypted ? tempEncryptedFile : tempDecryptedFile; + long segmentSizeInBytes; switch (uploadType) { case URI: downloadSegmentFileFromURI(downloadUri, dstFile, tableName); + segmentSizeInBytes = dstFile.length(); break; case SEGMENT: createSegmentFileFromMultipart(multiPart, dstFile); + segmentSizeInBytes = dstFile.length(); break; case METADATA: moveSegmentToFinalLocation = false; Preconditions.checkState(downloadUri != null, "Download URI is required in segment metadata upload mode"); createSegmentFileFromMultipart(multiPart, dstFile); + URI segmentURI = new URI(downloadUri); + PinotFS pinotFS = PinotFSFactory.create(segmentURI.getScheme()); + segmentSizeInBytes = pinotFS.length(segmentURI); Review comment: Let's add a try-catch in case reading the size throws exception. We don't want to fail the upload because of this. If it throws exception, we can log a warning and set size to -1 ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/util/TableSizeReader.java ########## @@ -101,16 +105,46 @@ public TableSizeDetails getTableSizeDetails(@Nonnull String tableName, @Nonnegat tableSizeDetails._realtimeSegments = getTableSubtypeSize(realtimeTableName, timeoutMsec); tableSizeDetails._reportedSizeInBytes += tableSizeDetails._realtimeSegments._reportedSizeInBytes; tableSizeDetails._estimatedSizeInBytes += tableSizeDetails._realtimeSegments._estimatedSizeInBytes; + + // Get size in bytes per replica + TableConfig realtimeTableConfig = + ZKMetadataProvider.getRealtimeTableConfig(_helixResourceManager.getPropertyStore(), realtimeTableName); + long sizeInBytesPerReplica = tableSizeDetails._estimatedSizeInBytes / getNumReplicas(realtimeTableConfig); + + // Update table size metrics + _controllerMetrics.setValueOfTableGauge(realtimeTableName, + ControllerGauge.REALTIME_TABLE_ESTIMATED_SIZE_PER_REPLICA, sizeInBytesPerReplica); } if (hasOfflineTable) { String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(tableName); tableSizeDetails._offlineSegments = getTableSubtypeSize(offlineTableName, timeoutMsec); tableSizeDetails._reportedSizeInBytes += tableSizeDetails._offlineSegments._reportedSizeInBytes; tableSizeDetails._estimatedSizeInBytes += tableSizeDetails._offlineSegments._estimatedSizeInBytes; + + // Get size in bytes per replica + TableConfig offlineTableConfig = + ZKMetadataProvider.getOfflineTableConfig(_helixResourceManager.getPropertyStore(), offlineTableName); + long sizeInBytesPerReplica = tableSizeDetails._estimatedSizeInBytes / getNumReplicas(offlineTableConfig); + + // Update table size metrics + _controllerMetrics.setValueOfTableGauge(offlineTableName, + ControllerGauge.OFFLINE_TABLE_ESTIMATED_SIZE_PER_REPLICA, sizeInBytesPerReplica); } return tableSizeDetails; } + private int getNumReplicas(TableConfig tableConfig) { + if (tableConfig.isDimTable()) { + // If the table is a dimension table then fetch the tenant config and get the number of server belonging + // to the tenant + TenantConfig tenantConfig = tableConfig.getTenantConfig(); + Set<String> serverInstances = _helixResourceManager.getAllInstancesForServerTenant(tenantConfig.getServer()); + return serverInstances.size(); + } + + return tableConfig.getValidationConfig().getReplicationNumber(); Review comment: For `LLC` table, we need to use `_replicasPerPartition` ########## File path: pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java ########## @@ -69,6 +69,12 @@ // Estimated size of offline table OFFLINE_TABLE_ESTIMATED_SIZE("OfflineTableEstimatedSize", false), + // Estimated size of offline table per replica + OFFLINE_TABLE_ESTIMATED_SIZE_PER_REPLICA("bytes", false), + + // Estimated size of realtime table per replica + REALTIME_TABLE_ESTIMATED_SIZE_PER_REPLICA("bytes", false), Review comment: We don't need to separate metrics for offline and realtime table. Merging them as `TABLE_ESTIMATED_SIZE_PER_REPLICA` ########## File path: pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/SegmentZKMetadata.java ########## @@ -312,6 +312,14 @@ private void setNonNegativeValue(String key, long value) { } } + public void setSizeInBytes(long sizeInBytes) { Review comment: Move these 2 methods ahead as they apply to general segments (probably after `setTotalDocs()`) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org