npawar commented on a change in pull request #5737: URL: https://github.com/apache/incubator-pinot/pull/5737#discussion_r460240388
########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -115,17 +116,17 @@ public RealtimeProvisioningHelperCommand setSampleCompletedSegmentDir(String sam return this; } - public RealtimeProvisioningHelperCommand setPeriodSampleSegmentConsumed(String periodSampleSegmentConsumed) { - _periodSampleSegmentConsumed = periodSampleSegmentConsumed; + public RealtimeProvisioningHelperCommand setPeriodSampleSegmentConsumed(int ingestionRate) { Review comment: setter name should be `setIngestionRate()` ? ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -145,6 +146,20 @@ public boolean getHelp() { return _help; } + @Override + public void printExamples() { + StringBuilder builder = new StringBuilder(); + builder.append("\n\nThis command allows you to estimate the capacity needed for provisioning realtime hosts") + .append("It assumes that there is no upper limit to the amount of memory you can mmap") + .append("\nIf you have a hybrid table, then consult the push frequency setting in your offline table specify it in the -pushFrequency argument") + .append("\nIf you have a realtime-only table, then the default behavior is to assume that your queries need all data in memory all the time") + .append("\nHowever, if most of your queries are going to be for (say) the last 96 hours, then you can specify that in -retentionHours") + .append("\nDoing so will let this program assume that you are willing to take a page hit when querying older data") + .append("\nand optimize memory and number of hosts accordingly.") + ; + System.out.println(builder.toString()); Review comment: LOGGER instead of sout? ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -158,17 +173,36 @@ public boolean execute() throw new RuntimeException("Exception in reading table config from file " + _tableConfigFile, e); } + StringBuilder note = new StringBuilder(); + note.append("\nNote:\n"); int numReplicas = tableConfig.getValidationConfig().getReplicasPerPartitionNumber(); - if (_retentionHours == 0) { - if (tableConfig.getValidationConfig().getSegmentPushFrequency().equalsIgnoreCase("hourly")) { - _retentionHours = DEFAULT_RETENTION_FOR_HOURLY_PUSH; + int tableRetentionHours = (int) TimeUnit.valueOf(tableConfig.getValidationConfig().getRetentionTimeUnit()) + .toHours(Long.parseLong(tableConfig.getValidationConfig().getRetentionTimeValue())); + if (_retentionHours > 0) { + note.append("\n* Table retention and push frequency ignored for determining retentionHours"); + } else { + if (_pushFrequency == null) { + // This is a realtime-only table. Pick up the retention time + _retentionHours = tableRetentionHours; + note.append("\n* Retention hours taken from tableConfig"); } else { - _retentionHours = DEFAULT_RETENTION_FOR_DAILY_PUSH; + if ("hourly".equalsIgnoreCase(_pushFrequency)) { + _retentionHours = DEFAULT_RETENTION_FOR_HOURLY_PUSH; + } else if ("daily".equalsIgnoreCase(_pushFrequency)) { + _retentionHours = DEFAULT_RETENTION_FOR_DAILY_PUSH; + } else if ("weekly".equalsIgnoreCase(_pushFrequency)) { Review comment: thats over 3 weeks for a weekly push. SHould it instead be 7days + 72h for weekly and 31 days + 72h for monthly? ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -90,8 +91,8 @@ public RealtimeProvisioningHelperCommand setNumPartitions(int numPartitions) { return this; } - public RealtimeProvisioningHelperCommand setRetentionHours(int retentionHours) { Review comment: setter for retentionHours is still needed rt? ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -158,17 +173,36 @@ public boolean execute() throw new RuntimeException("Exception in reading table config from file " + _tableConfigFile, e); } + StringBuilder note = new StringBuilder(); + note.append("\nNote:\n"); int numReplicas = tableConfig.getValidationConfig().getReplicasPerPartitionNumber(); - if (_retentionHours == 0) { - if (tableConfig.getValidationConfig().getSegmentPushFrequency().equalsIgnoreCase("hourly")) { - _retentionHours = DEFAULT_RETENTION_FOR_HOURLY_PUSH; + int tableRetentionHours = (int) TimeUnit.valueOf(tableConfig.getValidationConfig().getRetentionTimeUnit()) + .toHours(Long.parseLong(tableConfig.getValidationConfig().getRetentionTimeValue())); + if (_retentionHours > 0) { + note.append("\n* Table retention and push frequency ignored for determining retentionHours"); Review comment: can we add "since retentionHours is provided" to this message. Otherwise it looks like it is flagging something that went wrong ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -177,25 +211,26 @@ public boolean execute() // Completed: Use multiple (completedSize,numHours) data points to calculate completed size for our numHours File sampleCompletedSegmentFile = new File(_sampleCompletedSegmentDir); - long sampleSegmentConsumedSeconds = - TimeUnit.SECONDS.convert(TimeUtils.convertPeriodToMillis(_periodSampleSegmentConsumed), TimeUnit.MILLISECONDS); - long maxUsableHostMemBytes = DataSizeUtils.toBytes(_maxUsableHostMemory); MemoryEstimator memoryEstimator = - new MemoryEstimator(tableConfig, sampleCompletedSegmentFile, sampleSegmentConsumedSeconds, - maxUsableHostMemBytes); + new MemoryEstimator(tableConfig, sampleCompletedSegmentFile, _ingestionRate, maxUsableHostMemBytes, tableRetentionHours); File sampleStatsHistory = memoryEstimator.initializeStatsHistory(); memoryEstimator .estimateMemoryUsed(sampleStatsHistory, numHosts, numHours, totalConsumingPartitions, _retentionHours); + note.append("\n* See https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime"); // TODO: Make a recommendation of what config to choose by considering more inputs such as qps - LOGGER.info("\nMemory used per host"); - displayResults(memoryEstimator.getTotalMemoryPerHost(), numHosts, numHours); + System.out.println("\n============================================================\n" + toString()); + System.out.println(note.toString()); + LOGGER.info("\nMemory used per host (Active/Mapped)"); + displayResults(memoryEstimator.getActiveMemoryPerHost(), numHosts, numHours); LOGGER.info("\nOptimal segment size"); displayResults(memoryEstimator.getOptimalSegmentSize(), numHosts, numHours); LOGGER.info("\nConsuming memory"); displayResults(memoryEstimator.getConsumingMemoryPerHost(), numHosts, numHours); + LOGGER.info("\nNumber of segments queried per host"); Review comment: I don't follow what "Number of segments queries per host" means. Won't that depend on the specific query patterns? Some more info about this in the log might help ########## File path: pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java ########## @@ -177,25 +211,26 @@ public boolean execute() // Completed: Use multiple (completedSize,numHours) data points to calculate completed size for our numHours File sampleCompletedSegmentFile = new File(_sampleCompletedSegmentDir); - long sampleSegmentConsumedSeconds = - TimeUnit.SECONDS.convert(TimeUtils.convertPeriodToMillis(_periodSampleSegmentConsumed), TimeUnit.MILLISECONDS); - long maxUsableHostMemBytes = DataSizeUtils.toBytes(_maxUsableHostMemory); MemoryEstimator memoryEstimator = - new MemoryEstimator(tableConfig, sampleCompletedSegmentFile, sampleSegmentConsumedSeconds, - maxUsableHostMemBytes); + new MemoryEstimator(tableConfig, sampleCompletedSegmentFile, _ingestionRate, maxUsableHostMemBytes, tableRetentionHours); File sampleStatsHistory = memoryEstimator.initializeStatsHistory(); memoryEstimator .estimateMemoryUsed(sampleStatsHistory, numHosts, numHours, totalConsumingPartitions, _retentionHours); + note.append("\n* See https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime"); // TODO: Make a recommendation of what config to choose by considering more inputs such as qps - LOGGER.info("\nMemory used per host"); - displayResults(memoryEstimator.getTotalMemoryPerHost(), numHosts, numHours); + System.out.println("\n============================================================\n" + toString()); Review comment: use LOGGER in all these lines? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org