jtao15 commented on a change in pull request #6419: URL: https://github.com/apache/incubator-pinot/pull/6419#discussion_r557050072
########## File path: pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java ########## @@ -85,4 +116,118 @@ boolean runOp() { public void setTableConfigFileNames(List<String> tableConfigFileNames) { _tableConfigFileNames = tableConfigFileNames; } + + public String getRecordReaderConfigFileName() { + return _recordReaderConfigFileName; + } + + public void setRecordReaderConfigFileName(String recordReaderConfigFileName) { + _recordReaderConfigFileName = recordReaderConfigFileName; + } + + @Override + boolean runOp() { + try { + File csvFile = new File(_inputDataFileName); + Map<String, String> streamConfigMap = JsonUtils.fileToObject(new File(_streamConfigFileName), HashMap.class); + final Map<String, Object> config = new HashMap<>(); + config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" + ClusterDescriptor.KAFKA_PORT); + config.put(AdminClientConfig.CLIENT_ID_CONFIG, "Kafka2AdminClient-" + UUID.randomUUID().toString()); + config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, 15000); + AdminClient adminClient = KafkaAdminClient.create(config); + + // create kafka topic + String topicName = streamConfigMap.get("stream.kafka.topic.name"); + int partitions = Integer.parseInt(streamConfigMap.get("stream.kafka.numPartitions")); + String partitionColumn = streamConfigMap.get("stream.kafka.partitionColumn"); + NewTopic newTopic = new NewTopic(topicName, partitions, (short) 1); + CreateTopicsResult createTopicsResult = adminClient.createTopics(Arrays.asList(newTopic)); + try { + createTopicsResult.all().get(); + } catch (InterruptedException | ExecutionException e) { + LOGGER.warn("Failed to create Kafka topic: {}, Exception: {}", newTopic.toString(), e); + } + + List<Long> existingTotalDocs = new ArrayList<>(); + List<String> tableNames = new ArrayList<>(); + + for (String tableConfigFileName : _tableConfigFileNames) { + // get table config + TableConfig tableConfig = JsonUtils.fileToObject(new File(tableConfigFileName), TableConfig.class); + + // get original rows + String tableName = tableConfig.getTableName(); + tableNames.add(tableName); + existingTotalDocs.add(fetchExistingTotalDocs(tableConfig.getTableName())); + } + + // push avro file to kafka + Schema avroSchema = StreamOpUtils.getAvroSchema(new File(_avroSchemaFileName)); + StreamOpUtils.pushCsvIntoKafka( + csvFile, + avroSchema, + null, + _numRows, + getCSVRecordReaderConfig(), + "localhost:" + KafkaStarterUtils.DEFAULT_KAFKA_PORT, + topicName, + 10000, + null, + partitionColumn); + + for (int i = 0; i < tableNames.size(); i++) { + // verify number of rows increases as expected + String tableName = tableNames.get(i); + long targetTotalDocs = existingTotalDocs.get(i) + _numRows; + waitForDocsLoaded(tableName, targetTotalDocs, 60_000L); + LOGGER.info("Verified {} new rows in table: {}", _numRows, tableName); + } + } catch (Exception e) { + LOGGER.error("Failed to ingest stream data", e); + return false; + } + return true; + } + + private RecordReaderConfig getCSVRecordReaderConfig() throws IOException { + CSVRecordReaderConfig recordReaderConfig = JsonUtils.fileToObject(new File(_recordReaderConfigFileName), CSVRecordReaderConfig.class); + return recordReaderConfig; + } + + private long fetchExistingTotalDocs(String tableName) throws Exception { + String query = "SELECT count(*) FROM " + tableName; + JsonNode response = ClusterTest.postQuery(query, ClusterDescriptor.BROKER_URL, false, "sql"); + if (response == null) { + String errorMsg = String.format("Failed to query Table: %s", tableName); + LOGGER.error(errorMsg); + throw new RuntimeException(errorMsg); + } + if (response.has("hasPartialResults") && response.get("hasPartialResults").asBoolean()) { Review comment: Actually, the `totalDocs` in `V1Constants` is the total docs for segment metadata, and there's no `hasPartialResults` in broker response. Used my own constants instead. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org