Jackie-Jiang commented on a change in pull request #6469: URL: https://github.com/apache/incubator-pinot/pull/6469#discussion_r562363147
########## File path: pinot-minion/src/main/java/org/apache/pinot/minion/executor/SegmentGenerationAndPushTaskExecutor.java ########## @@ -281,18 +282,27 @@ private SegmentGenerationTaskSpec generateTaskSpec(Map<String, String> taskConfi recordReaderSpec.setClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CLASS)); recordReaderSpec.setConfigClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CONFIG_CLASS)); taskSpec.setRecordReaderSpec(recordReaderSpec); + + String rawTableName = taskConfigs.get(BatchConfigProperties.TABLE_NAME); + String tableNameWithType = rawTableName != null ? TableNameBuilder.OFFLINE.tableNameWithType(rawTableName) : null; Schema schema; if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA)) { schema = JsonUtils .stringToObject(JsonUtils.objectToString(taskConfigs.get(BatchConfigProperties.SCHEMA)), Schema.class); } else if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA_URI)) { Review comment: Is this for backward-compatibility? I don't see where we set the uri in the task generator ########## File path: pinot-minion/src/main/java/org/apache/pinot/minion/executor/SegmentGenerationAndPushTaskExecutor.java ########## @@ -281,18 +282,27 @@ private SegmentGenerationTaskSpec generateTaskSpec(Map<String, String> taskConfi recordReaderSpec.setClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CLASS)); recordReaderSpec.setConfigClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CONFIG_CLASS)); taskSpec.setRecordReaderSpec(recordReaderSpec); + + String rawTableName = taskConfigs.get(BatchConfigProperties.TABLE_NAME); + String tableNameWithType = rawTableName != null ? TableNameBuilder.OFFLINE.tableNameWithType(rawTableName) : null; Schema schema; if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA)) { schema = JsonUtils .stringToObject(JsonUtils.objectToString(taskConfigs.get(BatchConfigProperties.SCHEMA)), Schema.class); } else if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA_URI)) { schema = SegmentGenerationUtils.getSchema(taskConfigs.get(BatchConfigProperties.SCHEMA_URI)); } else { - throw new RuntimeException( - "Missing schema for segment generation job: please set `schema` or `schemaURI` in task config."); + schema = getSchema(tableNameWithType); } taskSpec.setSchema(schema); - JsonNode tableConfig = JsonUtils.stringToJsonNode(taskConfigs.get(BatchConfigProperties.TABLE_CONFIGS)); + JsonNode tableConfig; Review comment: Why do we store table config as JsonNode instead of the TableConfig? Right now TableConfig is accessible in the spi package ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/SegmentGenerationAndPushTaskGenerator.java ########## @@ -220,15 +220,13 @@ public String getTaskType() { String pushMode = IngestionConfigUtils.getPushMode(batchConfigMap); Map<String, String> singleFileGenerationTaskConfig = new HashMap<>(batchConfigMap); + singleFileGenerationTaskConfig.put(BatchConfigProperties.TABLE_NAME, Review comment: We might want to put the offline table name (table name with type) in case we want to support generating segments for realtime table in the future. ########## File path: pinot-minion/src/main/java/org/apache/pinot/minion/executor/SegmentGenerationAndPushTaskExecutor.java ########## @@ -281,18 +282,27 @@ private SegmentGenerationTaskSpec generateTaskSpec(Map<String, String> taskConfi recordReaderSpec.setClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CLASS)); recordReaderSpec.setConfigClassName(taskConfigs.get(BatchConfigProperties.RECORD_READER_CONFIG_CLASS)); taskSpec.setRecordReaderSpec(recordReaderSpec); + + String rawTableName = taskConfigs.get(BatchConfigProperties.TABLE_NAME); + String tableNameWithType = rawTableName != null ? TableNameBuilder.OFFLINE.tableNameWithType(rawTableName) : null; Schema schema; if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA)) { schema = JsonUtils .stringToObject(JsonUtils.objectToString(taskConfigs.get(BatchConfigProperties.SCHEMA)), Schema.class); } else if (taskConfigs.containsKey(BatchConfigProperties.SCHEMA_URI)) { schema = SegmentGenerationUtils.getSchema(taskConfigs.get(BatchConfigProperties.SCHEMA_URI)); } else { - throw new RuntimeException( - "Missing schema for segment generation job: please set `schema` or `schemaURI` in task config."); + schema = getSchema(tableNameWithType); } taskSpec.setSchema(schema); - JsonNode tableConfig = JsonUtils.stringToJsonNode(taskConfigs.get(BatchConfigProperties.TABLE_CONFIGS)); + JsonNode tableConfig; + if (taskConfigs.containsKey(BatchConfigProperties.TABLE_CONFIGS)) { + tableConfig = JsonUtils.stringToJsonNode(taskConfigs.get(BatchConfigProperties.TABLE_CONFIGS)); + } else if (taskConfigs.containsKey(BatchConfigProperties.TABLE_CONFIGS_URI)) { Review comment: Same here ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org