yashmayya commented on code in PR #14110: URL: https://github.com/apache/pinot/pull/14110#discussion_r1895411031
########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/cursors/MemoryResponseStore.java: ########## @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests.cursors; + +import com.google.auto.service.AutoService; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import javax.validation.constraints.NotNull; +import org.apache.pinot.common.cursors.AbstractResponseStore; +import org.apache.pinot.common.metrics.BrokerMetrics; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.spi.cursors.ResponseStore; +import org.apache.pinot.spi.env.PinotConfiguration; + + +@AutoService(ResponseStore.class) +public class MemoryResponseStore extends AbstractResponseStore { + private final Map<String, CursorResponse> _cursorResponseMap = new HashMap<>(); + private final Map<String, ResultTable> _resultTableMap = new HashMap<>(); + + private static final String TYPE = "memory"; + + private BrokerMetrics _brokerMetrics; + private String _brokerHost; + private int _brokerPort; + private long _expirationIntervalInMs; Review Comment: All these fields are unused? ########## pinot-broker/src/main/java/org/apache/pinot/broker/cursors/FsResponseStore.java: ########## @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.broker.cursors; + +import com.google.auto.service.AutoService; +import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import org.apache.pinot.common.cursors.AbstractResponseStore; +import org.apache.pinot.common.metrics.BrokerMetrics; +import org.apache.pinot.common.response.BrokerResponse; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.spi.cursors.ResponseStore; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.filesystem.FileMetadata; +import org.apache.pinot.spi.filesystem.PinotFS; +import org.apache.pinot.spi.filesystem.PinotFSFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Stores responses in a file system. All storage schemes supported by PinotFS can be used. + * Responses are stored in "data.dir" directory with the following structure: + * - A directory is created for every request id. + * - Response metadata is stored with filename "response" + * - Results are stored with filename "resultTable" + * The extension of the file is determined by the config "extension" + * + */ +@AutoService(ResponseStore.class) +public class FsResponseStore extends AbstractResponseStore { + private static final Logger LOGGER = LoggerFactory.getLogger(FsResponseStore.class); + private static final String TYPE = "file"; + private static final String RESULT_TABLE_FILE_NAME_FORMAT = "resultTable.%s"; + private static final String RESPONSE_FILE_NAME_FORMAT = "response.%s"; + private static final String URI_SEPARATOR = "/"; + + public static final String TEMP_DIR = "temp.dir"; + public static final String DATA_DIR = "data.dir"; + public static final String FILE_NAME_EXTENSION = "extension"; + public static final Path DEFAULT_ROOT_DIR = Path.of(System.getProperty("java.io.tmpdir"), "broker", "responseStore"); + public static final Path DEFAULT_TEMP_DIR = DEFAULT_ROOT_DIR.resolve("temp"); + public static final URI DEFAULT_DATA_DIR = DEFAULT_ROOT_DIR.resolve("data").toUri(); + public static final String DEFAULT_FILE_NAME_EXTENSION = "json"; + + private Path _localTempDir; + private URI _dataDir; + private JsonResponseSerde _responseSerde; + private String _fileExtension; + + private static URI combinePath(URI baseUri, String path) + throws URISyntaxException { + String newPath = + baseUri.getPath().endsWith(URI_SEPARATOR) ? baseUri.getPath() + path : baseUri.getPath() + URI_SEPARATOR + path; + return new URI(baseUri.getScheme(), baseUri.getHost(), newPath, null); + } + + @Override + public String getType() { + return TYPE; + } + + @Override + public void init(PinotConfiguration config, String brokerHost, int brokerPort, String brokerId, + BrokerMetrics brokerMetrics, String expirationTime) + throws Exception { + init(brokerHost, brokerPort, brokerId, brokerMetrics, expirationTime); + + _responseSerde = new JsonResponseSerde(); + _fileExtension = config.getProperty(FILE_NAME_EXTENSION, DEFAULT_FILE_NAME_EXTENSION); + _localTempDir = config.containsKey(TEMP_DIR) ? Path.of(config.getProperty(TEMP_DIR)) : DEFAULT_TEMP_DIR; + Files.createDirectories(_localTempDir); + + _dataDir = config.containsKey(DATA_DIR) ? new URI(config.getProperty(DATA_DIR)) : DEFAULT_DATA_DIR; + PinotFS pinotFS = PinotFSFactory.create(_dataDir.getScheme()); + pinotFS.mkdir(_dataDir); + } + + private Path getTempPath(String... nameParts) { + StringBuilder filename = new StringBuilder(); + for (String part : nameParts) { + filename.append(part).append("_"); + } + filename.append(Thread.currentThread().getId()); + return _localTempDir.resolve(filename.toString()); + } + + @Override + public boolean exists(String requestId) + throws Exception { + PinotFS pinotFS = PinotFSFactory.create(_dataDir.getScheme()); + URI queryDir = combinePath(_dataDir, requestId); + return pinotFS.exists(queryDir); + } + + @Override + public Collection<String> getAllStoredRequestIds() + throws Exception { + PinotFS pinotFS = PinotFSFactory.create(_dataDir.getScheme()); + List<FileMetadata> queryPaths = pinotFS.listFilesWithMetadata(_dataDir, true); + List<String> requestIdList = new ArrayList<>(queryPaths.size()); + + LOGGER.debug("Found {} paths.", queryPaths.size()); + + for (FileMetadata metadata : queryPaths) { + LOGGER.debug("Processing query path: {}", metadata.toString()); + if (metadata.isDirectory()) { + try { + URI queryDir = new URI(metadata.getFilePath()); + URI metadataFile = combinePath(queryDir, String.format(RESPONSE_FILE_NAME_FORMAT, _fileExtension)); + boolean metadataFileExists = pinotFS.exists(metadataFile); + LOGGER.debug("Checking for query dir {} & metadata file: {}. Metadata file exists: {}", queryDir, + metadataFile, metadataFileExists); + if (metadataFileExists) { + BrokerResponse response = + _responseSerde.deserialize(pinotFS.open(metadataFile), CursorResponseNative.class); + if (response.getBrokerId().equals(_brokerId)) { + requestIdList.add(response.getRequestId()); + } + LOGGER.debug("Added response store {}", queryDir); Review Comment: Shouldn't this log line be inside the above if condition (i.e. log only if the broker ID matches)? ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/cursors/MemoryResponseStore.java: ########## @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests.cursors; + +import com.google.auto.service.AutoService; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import javax.validation.constraints.NotNull; +import org.apache.pinot.common.cursors.AbstractResponseStore; +import org.apache.pinot.common.metrics.BrokerMetrics; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.spi.cursors.ResponseStore; +import org.apache.pinot.spi.env.PinotConfiguration; + + +@AutoService(ResponseStore.class) +public class MemoryResponseStore extends AbstractResponseStore { + private final Map<String, CursorResponse> _cursorResponseMap = new HashMap<>(); + private final Map<String, ResultTable> _resultTableMap = new HashMap<>(); + + private static final String TYPE = "memory"; + + private BrokerMetrics _brokerMetrics; + private String _brokerHost; + private int _brokerPort; + private long _expirationIntervalInMs; + + + @Override + public String getType() { + return TYPE; + } + + @Override + protected void writeResponse(String requestId, CursorResponse response) + throws Exception { Review Comment: nit: can be removed (same with the other methods here). ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { Review Comment: query store -> response store (here and all the usages below)? ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { + return brokerBaseApiUrl + "/responseStore"; + } + + protected String getBrokerResponseApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId + "/results"; + } + + protected String getBrokerDeleteQueryStoresApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId; + } + + protected String getBrokerQueryApiUrl(String brokerBaseApiUrl) { + return useMultiStageQueryEngine() ? brokerBaseApiUrl + "/query" : brokerBaseApiUrl + "/query/sql"; + } + + protected String getCursorQueryProperties(int numRows) { + return String.format("?getCursor=true&numRows=%d", numRows); + } + + protected String getCursorOffset(int offset) { + return String.format("?offset=%d", offset); + } + + protected String getCursorOffset(int offset, int numRows) { + return String.format("?offset=%d&numRows=%d", offset, numRows); + } + + protected Map<String, String> getHeaders() { + return Collections.emptyMap(); + } + + /* + * This test does not use H2 to compare results. Instead, it compares results got from iterating through a + * cursor AND the complete result set. + * Right now, it only compares the number of rows and all columns and rows. + */ + @Override + protected void testQuery(String pinotQuery, String h2Query) + throws Exception { + String queryResourceUrl = getBrokerBaseApiUrl(); + Map<String, String> headers = getHeaders(); + Map<String, String> extraJsonProperties = getExtraQueryProperties(); + + // Get Pinot BrokerResponse without cursors + JsonNode pinotResponse; + pinotResponse = + ClusterTest.postQuery(pinotQuery, getBrokerQueryApiUrl(queryResourceUrl), headers, extraJsonProperties); + if (!pinotResponse.get("exceptions").isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotResponse); + } + int brokerResponseSize = pinotResponse.get("numRowsResultSet").asInt(); + + // Get a list of responses using cursors. + CursorResponse pinotPagingResponse; + pinotPagingResponse = JsonUtils.jsonNodeToObject(ClusterTest.postQuery(pinotQuery, + getBrokerQueryApiUrl(queryResourceUrl) + getCursorQueryProperties(_resultSize), headers, + getExtraQueryProperties()), CursorResponseNative.class); + if (!pinotPagingResponse.getExceptions().isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotPagingResponse.getExceptions().get(0)); + } + List<CursorResponse> resultPages = getAllResultPages(queryResourceUrl, headers, pinotPagingResponse, _resultSize); + + int brokerPagingResponseSize = 0; + for (CursorResponse response : resultPages) { + brokerPagingResponseSize += response.getNumRows(); + } + + // Compare the number of rows. + if (brokerResponseSize != brokerPagingResponseSize) { + throw new RuntimeException( + "Pinot # of rows from paging API " + brokerPagingResponseSize + " doesn't match # of rows from default API " + + brokerResponseSize); + } + } + + private List<CursorResponse> getAllResultPages(String queryResourceUrl, Map<String, String> headers, + CursorResponse firstResponse, int numRows) + throws Exception { + numRows = numRows == 0 ? CommonConstants.CursorConfigs.DEFAULT_CURSOR_FETCH_ROWS : numRows; + + List<CursorResponse> resultPages = new ArrayList<>(); + resultPages.add(firstResponse); + int totalRows = firstResponse.getNumRowsResultSet(); + + int offset = firstResponse.getNumRows(); + while (offset < totalRows) { + CursorResponse response = JsonUtils.stringToObject(ClusterTest.sendGetRequest( + getBrokerResponseApiUrl(queryResourceUrl, firstResponse.getRequestId()) + getCursorOffset(offset, numRows), + headers), CursorResponseNative.class); + resultPages.add(response); + offset += response.getNumRows(); + } + return resultPages; + } + + protected Object[][] getPageSizesAndQueryEngine() { + return new Object[][]{ + {false, 2}, {false, 3}, {false, 10}, {false, 0}, //0 trigger default behaviour + {true, 2}, {true, 3}, {true, 10}, {true, 0} //0 trigger default behaviour + }; + } + + @DataProvider(name = "pageSizeAndQueryEngineProvider") + public Object[][] pageSizeAndQueryEngineProvider() { + return getPageSizesAndQueryEngine(); + } + + // Test hard coded queries with SSE/MSE AND different cursor response sizes. + @Test(dataProvider = "pageSizeAndQueryEngineProvider") + public void testHardcodedQueries(boolean useMultiStageEngine, int pageSize) + throws Exception { + _resultSize = pageSize; + setUseMultiStageQueryEngine(useMultiStageEngine); + super.testHardcodedQueries(); + } + + @DataProvider(name = "chooseQueryEngine") + public Object[][] chooseQueryEngine() { + return new Object[][] { + {false}, {true} + }; + } Review Comment: This already exists (`useBothQueryEngines`). ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { + return brokerBaseApiUrl + "/responseStore"; + } + + protected String getBrokerResponseApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId + "/results"; + } + + protected String getBrokerDeleteQueryStoresApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId; + } + + protected String getBrokerQueryApiUrl(String brokerBaseApiUrl) { Review Comment: This already exists - `ClusterIntegrationTestUtils::getBrokerQueryApiUrl` ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { + return brokerBaseApiUrl + "/responseStore"; + } + + protected String getBrokerResponseApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId + "/results"; + } + + protected String getBrokerDeleteQueryStoresApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId; + } + + protected String getBrokerQueryApiUrl(String brokerBaseApiUrl) { + return useMultiStageQueryEngine() ? brokerBaseApiUrl + "/query" : brokerBaseApiUrl + "/query/sql"; + } + + protected String getCursorQueryProperties(int numRows) { + return String.format("?getCursor=true&numRows=%d", numRows); + } + + protected String getCursorOffset(int offset) { + return String.format("?offset=%d", offset); + } + + protected String getCursorOffset(int offset, int numRows) { + return String.format("?offset=%d&numRows=%d", offset, numRows); + } + + protected Map<String, String> getHeaders() { + return Collections.emptyMap(); + } + + /* + * This test does not use H2 to compare results. Instead, it compares results got from iterating through a + * cursor AND the complete result set. + * Right now, it only compares the number of rows and all columns and rows. + */ + @Override + protected void testQuery(String pinotQuery, String h2Query) + throws Exception { + String queryResourceUrl = getBrokerBaseApiUrl(); + Map<String, String> headers = getHeaders(); + Map<String, String> extraJsonProperties = getExtraQueryProperties(); + + // Get Pinot BrokerResponse without cursors + JsonNode pinotResponse; + pinotResponse = + ClusterTest.postQuery(pinotQuery, getBrokerQueryApiUrl(queryResourceUrl), headers, extraJsonProperties); + if (!pinotResponse.get("exceptions").isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotResponse); + } + int brokerResponseSize = pinotResponse.get("numRowsResultSet").asInt(); + + // Get a list of responses using cursors. + CursorResponse pinotPagingResponse; + pinotPagingResponse = JsonUtils.jsonNodeToObject(ClusterTest.postQuery(pinotQuery, + getBrokerQueryApiUrl(queryResourceUrl) + getCursorQueryProperties(_resultSize), headers, + getExtraQueryProperties()), CursorResponseNative.class); + if (!pinotPagingResponse.getExceptions().isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotPagingResponse.getExceptions().get(0)); + } + List<CursorResponse> resultPages = getAllResultPages(queryResourceUrl, headers, pinotPagingResponse, _resultSize); + + int brokerPagingResponseSize = 0; + for (CursorResponse response : resultPages) { + brokerPagingResponseSize += response.getNumRows(); + } + + // Compare the number of rows. + if (brokerResponseSize != brokerPagingResponseSize) { + throw new RuntimeException( + "Pinot # of rows from paging API " + brokerPagingResponseSize + " doesn't match # of rows from default API " + + brokerResponseSize); + } + } + + private List<CursorResponse> getAllResultPages(String queryResourceUrl, Map<String, String> headers, + CursorResponse firstResponse, int numRows) + throws Exception { + numRows = numRows == 0 ? CommonConstants.CursorConfigs.DEFAULT_CURSOR_FETCH_ROWS : numRows; + + List<CursorResponse> resultPages = new ArrayList<>(); + resultPages.add(firstResponse); + int totalRows = firstResponse.getNumRowsResultSet(); + + int offset = firstResponse.getNumRows(); + while (offset < totalRows) { + CursorResponse response = JsonUtils.stringToObject(ClusterTest.sendGetRequest( + getBrokerResponseApiUrl(queryResourceUrl, firstResponse.getRequestId()) + getCursorOffset(offset, numRows), + headers), CursorResponseNative.class); + resultPages.add(response); + offset += response.getNumRows(); + } + return resultPages; + } + + protected Object[][] getPageSizesAndQueryEngine() { + return new Object[][]{ + {false, 2}, {false, 3}, {false, 10}, {false, 0}, //0 trigger default behaviour + {true, 2}, {true, 3}, {true, 10}, {true, 0} //0 trigger default behaviour + }; + } + + @DataProvider(name = "pageSizeAndQueryEngineProvider") + public Object[][] pageSizeAndQueryEngineProvider() { + return getPageSizesAndQueryEngine(); + } + + // Test hard coded queries with SSE/MSE AND different cursor response sizes. + @Test(dataProvider = "pageSizeAndQueryEngineProvider") + public void testHardcodedQueries(boolean useMultiStageEngine, int pageSize) + throws Exception { + _resultSize = pageSize; + setUseMultiStageQueryEngine(useMultiStageEngine); + super.testHardcodedQueries(); + } + + @DataProvider(name = "chooseQueryEngine") + public Object[][] chooseQueryEngine() { + return new Object[][] { + {false}, {true} + }; + } + + // Test a simple cursor workflow. + @Test(dataProvider = "chooseQueryEngine") + public void testCursorWorkflow(boolean useMultiStageQueryEngine) + throws Exception { + _resultSize = 10000; + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + // Submit query + CursorResponse pinotPagingResponse; + JsonNode jsonNode = ClusterTest.postQuery(TEST_QUERY_THREE, + getBrokerQueryApiUrl(getBrokerBaseApiUrl()) + getCursorQueryProperties(_resultSize), getHeaders(), + getExtraQueryProperties()); + + pinotPagingResponse = JsonUtils.jsonNodeToObject(jsonNode, CursorResponseNative.class); + if (!pinotPagingResponse.getExceptions().isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotPagingResponse.getExceptions().get(0)); + } + String requestId = pinotPagingResponse.getRequestId(); + + Assert.assertFalse(pinotPagingResponse.getBrokerHost().isEmpty()); + Assert.assertTrue(pinotPagingResponse.getBrokerPort() > 0); + Assert.assertTrue(pinotPagingResponse.getCursorFetchTimeMs() >= 0); + Assert.assertTrue(pinotPagingResponse.getCursorResultWriteTimeMs() >= 0); + + int totalRows = pinotPagingResponse.getNumRowsResultSet(); + int offset = pinotPagingResponse.getNumRows(); + while (offset < totalRows) { + pinotPagingResponse = JsonUtils.stringToObject(ClusterTest.sendGetRequest( + getBrokerResponseApiUrl(getBrokerBaseApiUrl(), requestId) + getCursorOffset(offset, _resultSize), + getHeaders()), CursorResponseNative.class); + + Assert.assertFalse(pinotPagingResponse.getBrokerHost().isEmpty()); + Assert.assertTrue(pinotPagingResponse.getBrokerPort() > 0); + Assert.assertTrue(pinotPagingResponse.getCursorFetchTimeMs() >= 0); + offset += _resultSize; + } + ClusterTest.sendDeleteRequest(getBrokerDeleteQueryStoresApiUrl(getBrokerBaseApiUrl(), requestId), getHeaders()); + } + + @Test + public void testGetAndDelete() + throws Exception { + _resultSize = 100000; + testQuery(TEST_QUERY_ONE); + testQuery(TEST_QUERY_TWO); + + List<CursorResponseNative> requestIds = JsonUtils.stringToObject( + ClusterTest.sendGetRequest(getBrokerGetAllQueryStoresApiUrl(getBrokerBaseApiUrl()), getHeaders()), + new TypeReference<>() { + }); + + Assert.assertEquals(requestIds.size(), 2); + + // Delete the first one + String deleteRequestId = requestIds.get(0).getRequestId(); + ClusterTest.sendDeleteRequest(getBrokerDeleteQueryStoresApiUrl(getBrokerBaseApiUrl(), deleteRequestId), + getHeaders()); + + requestIds = JsonUtils.stringToObject( + ClusterTest.sendGetRequest(getBrokerGetAllQueryStoresApiUrl(getBrokerBaseApiUrl()), getHeaders()), + new TypeReference<>() { + }); + + Assert.assertEquals(requestIds.size(), 1); + Assert.assertNotEquals(requestIds.get(0).getRequestId(), deleteRequestId); + } + + @Test + public void testBadGet() { + try { + ClusterTest.sendGetRequest(getBrokerResponseApiUrl(getBrokerBaseApiUrl(), "dummy") + getCursorOffset(0), + getHeaders()); + } catch (IOException e) { + HttpErrorStatusException h = (HttpErrorStatusException) e.getCause(); + Assert.assertEquals(h.getStatusCode(), 404); + Assert.assertTrue(h.getMessage().contains("Query results for dummy not found")); + } + } + + @Test + public void testBadDelete() { + try { + ClusterTest.sendDeleteRequest(getBrokerDeleteQueryStoresApiUrl(getBrokerBaseApiUrl(), "dummy"), getHeaders()); + } catch (IOException e) { + HttpErrorStatusException h = (HttpErrorStatusException) e.getCause(); + Assert.assertEquals(h.getStatusCode(), 404); + Assert.assertTrue(h.getMessage().contains("Query results for dummy not found")); + } + } + + @Test + public void testQueryWithEmptyResult() + throws Exception { + JsonNode pinotResponse = ClusterTest.postQuery(EMPTY_RESULT_QUERY, + getBrokerQueryApiUrl(getBrokerBaseApiUrl()) + getCursorQueryProperties(1000), getHeaders(), + getExtraQueryProperties()); + + // There should be no resultTable. + Assert.assertNull(pinotResponse.get("resultTable")); + // Total Rows in result set should be 0. + Assert.assertEquals(pinotResponse.get("numRowsResultSet").asInt(), 0); + // Rows in the current response should be 0 + Assert.assertEquals(pinotResponse.get("numRows").asInt(), 0); + Assert.assertTrue(pinotResponse.get("exceptions").isEmpty()); + } + + @DataProvider(name = "InvalidOffsetQueryProvider") + public Object[][] invalidOffsetQueryProvider() { + return new Object[][]{{TEST_QUERY_ONE}, {EMPTY_RESULT_QUERY}}; + } + + @Test(dataProvider = "InvalidOffsetQueryProvider", expectedExceptions = IOException.class, + expectedExceptionsMessageRegExp = ".*Offset \\d+ should be lesser than totalRecords \\d+.*") + public void testGetInvalidOffset(String query) + throws Exception { + CursorResponse pinotPagingResponse; + pinotPagingResponse = JsonUtils.jsonNodeToObject(ClusterTest.postQuery(query, + getBrokerQueryApiUrl(getBrokerBaseApiUrl()) + getCursorQueryProperties(_resultSize), getHeaders(), + getExtraQueryProperties()), CursorResponseNative.class); + Assert.assertTrue(pinotPagingResponse.getExceptions().isEmpty()); + ClusterTest.sendGetRequest( + getBrokerResponseApiUrl(getBrokerBaseApiUrl(), pinotPagingResponse.getRequestId()) + getCursorOffset( + pinotPagingResponse.getNumRowsResultSet() + 1), getHeaders()); + } + + @Test + public void testQueryWithRuntimeError() + throws Exception { + String queryWithFromMissing = "SELECT * mytable limit 100"; + JsonNode pinotResponse; + pinotResponse = ClusterTest.postQuery(queryWithFromMissing, + getBrokerQueryApiUrl(getBrokerBaseApiUrl()) + getCursorQueryProperties(_resultSize), getHeaders(), + getExtraQueryProperties()); + Assert.assertFalse(pinotResponse.get("exceptions").isEmpty()); + JsonNode exception = pinotResponse.get("exceptions").get(0); + Assert.assertTrue(exception.get("message").asText().startsWith("QueryValidationError:")); + Assert.assertEquals(exception.get("errorCode").asInt(), 700); + Assert.assertTrue(pinotResponse.get("brokerId").asText().startsWith("Broker_")); Review Comment: Maybe we can also verify that the get all response stores API returns 0 results in this case? ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { + return brokerBaseApiUrl + "/responseStore"; + } + + protected String getBrokerResponseApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId + "/results"; + } + + protected String getBrokerDeleteQueryStoresApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId; + } + + protected String getBrokerQueryApiUrl(String brokerBaseApiUrl) { + return useMultiStageQueryEngine() ? brokerBaseApiUrl + "/query" : brokerBaseApiUrl + "/query/sql"; + } + + protected String getCursorQueryProperties(int numRows) { + return String.format("?getCursor=true&numRows=%d", numRows); + } + + protected String getCursorOffset(int offset) { + return String.format("?offset=%d", offset); + } + + protected String getCursorOffset(int offset, int numRows) { + return String.format("?offset=%d&numRows=%d", offset, numRows); + } + + protected Map<String, String> getHeaders() { + return Collections.emptyMap(); + } + + /* + * This test does not use H2 to compare results. Instead, it compares results got from iterating through a + * cursor AND the complete result set. + * Right now, it only compares the number of rows and all columns and rows. + */ + @Override + protected void testQuery(String pinotQuery, String h2Query) + throws Exception { + String queryResourceUrl = getBrokerBaseApiUrl(); + Map<String, String> headers = getHeaders(); + Map<String, String> extraJsonProperties = getExtraQueryProperties(); + + // Get Pinot BrokerResponse without cursors + JsonNode pinotResponse; + pinotResponse = + ClusterTest.postQuery(pinotQuery, getBrokerQueryApiUrl(queryResourceUrl), headers, extraJsonProperties); + if (!pinotResponse.get("exceptions").isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotResponse); + } Review Comment: Can use the `assertNoError` API here. ########## pinot-controller/src/main/java/org/apache/pinot/controller/cursors/ResponseStoreCleaner.java: ########## @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.controller.cursors; + +import com.fasterxml.jackson.core.type.TypeReference; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.CompletionService; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hc.client5.http.classic.methods.HttpDelete; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.classic.methods.HttpUriRequestBase; +import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.helix.model.InstanceConfig; +import org.apache.pinot.common.auth.AuthProviderUtils; +import org.apache.pinot.common.http.MultiHttpRequest; +import org.apache.pinot.common.http.MultiHttpRequestResponse; +import org.apache.pinot.common.metrics.ControllerMetrics; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.ControllerConf; +import org.apache.pinot.controller.LeadControllerManager; +import org.apache.pinot.controller.api.resources.InstanceInfo; +import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; +import org.apache.pinot.controller.helix.core.periodictask.ControllerPeriodicTask; +import org.apache.pinot.spi.auth.AuthProvider; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.spi.utils.TimeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * ResponseStoreCleaner periodically gets all responses stored in a response store and deletes the ones that have + * expired. From each broker, tt gets the list of responses. Each of the response has an expiration unix timestamp. + * If the current timestamp is greater, it calls a DELETE API for every response that has expired. + */ +public class ResponseStoreCleaner extends ControllerPeriodicTask<Void> { + private static final Logger LOGGER = LoggerFactory.getLogger(ResponseStoreCleaner.class); + private static final int TIMEOUT_MS = 3000; + private static final String QUERY_RESULT_STORE = "%s://%s:%d/responseStore"; + private static final String DELETE_QUERY_RESULT = "%s://%s:%d/responseStore/%s"; + // Used in tests to trigger the delete instead of waiting for the wall clock to move to an appropriate time. + public static final String CLEAN_AT_TIME = "response.store.cleaner.clean.at.ms"; Review Comment: Assuming you were talking about `CursorIntegrationTest::testResponseStoreCleaner` here - doesn't that test only verify that at least one of the two response stores is deleted? In that case, why do we need this `response.store.cleaner.clean.at.ms` internal configuration? Wouldn't a short response store expiration value in the broker combined with a short frequency for the controller periodic job do the trick considering we're using a 100 second timeout for the wait condition anyway? ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/CursorIntegrationTest.java: ########## @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.pinot.common.exception.HttpErrorStatusException; +import org.apache.pinot.common.response.CursorResponse; +import org.apache.pinot.common.response.broker.CursorResponseNative; +import org.apache.pinot.controller.cursors.ResponseStoreCleaner; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.env.PinotConfiguration; +import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.util.TestUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + + +public class CursorIntegrationTest extends BaseClusterIntegrationTestSet { + private static final Logger LOGGER = LoggerFactory.getLogger(CursorIntegrationTest.class); + private static final int NUM_OFFLINE_SEGMENTS = 8; + private static final int COUNT_STAR_RESULT = 79003; + private static final String TEST_QUERY_ONE = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + + "'DL'"; + private static final String TEST_QUERY_TWO = + "SELECT CAST(CAST(ArrTime AS varchar) AS LONG) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = 'DL' " + + "ORDER BY ArrTime DESC"; + private static final String TEST_QUERY_THREE = + "SELECT ArrDelay, CarrierDelay, (ArrDelay - CarrierDelay) AS diff FROM mytable WHERE ArrDelay > CarrierDelay " + + "ORDER BY diff, ArrDelay, CarrierDelay LIMIT 100000"; + private static final String EMPTY_RESULT_QUERY = + "SELECT SUM(CAST(CAST(ArrTime AS varchar) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND 1 != 1"; + + private static int _resultSize; + + @Override + protected void overrideControllerConf(Map<String, Object> properties) { + properties.put(CommonConstants.CursorConfigs.RESPONSE_STORE_CLEANER_FREQUENCY_PERIOD, "5m"); + } + + @Override + protected void overrideBrokerConf(PinotConfiguration configuration) { + configuration.setProperty(CommonConstants.CursorConfigs.PREFIX_OF_CONFIG_OF_RESPONSE_STORE + ".type", "memory"); + } + + protected long getCountStarResult() { + return COUNT_STAR_RESULT; + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start Zk, Kafka and Pinot + startZk(); + startController(); + startBroker(); + startServer(); + + List<File> avroFiles = getAllAvroFiles(); + List<File> offlineAvroFiles = getOfflineAvroFiles(avroFiles, NUM_OFFLINE_SEGMENTS); + + // Create and upload the schema and table config + Schema schema = createSchema(); + getControllerRequestClient().addSchema(schema); + TableConfig offlineTableConfig = createOfflineTableConfig(); + addTableConfig(offlineTableConfig); + + // Create and upload segments + ClusterIntegrationTestUtils.buildSegmentsFromAvro(offlineAvroFiles, offlineTableConfig, schema, 0, _segmentDir, + _tarDir); + uploadSegments(getTableName(), _tarDir); + + // Initialize the query generator + setUpQueryGenerator(avroFiles); + + // Wait for all documents loaded + waitForAllDocsLoaded(100_000L); + } + + protected String getBrokerGetAllQueryStoresApiUrl(String brokerBaseApiUrl) { + return brokerBaseApiUrl + "/responseStore"; + } + + protected String getBrokerResponseApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId + "/results"; + } + + protected String getBrokerDeleteQueryStoresApiUrl(String brokerBaseApiUrl, String requestId) { + return getBrokerGetAllQueryStoresApiUrl(brokerBaseApiUrl) + "/" + requestId; + } + + protected String getBrokerQueryApiUrl(String brokerBaseApiUrl) { + return useMultiStageQueryEngine() ? brokerBaseApiUrl + "/query" : brokerBaseApiUrl + "/query/sql"; + } + + protected String getCursorQueryProperties(int numRows) { + return String.format("?getCursor=true&numRows=%d", numRows); + } + + protected String getCursorOffset(int offset) { + return String.format("?offset=%d", offset); + } + + protected String getCursorOffset(int offset, int numRows) { + return String.format("?offset=%d&numRows=%d", offset, numRows); + } + + protected Map<String, String> getHeaders() { + return Collections.emptyMap(); + } + + /* + * This test does not use H2 to compare results. Instead, it compares results got from iterating through a + * cursor AND the complete result set. + * Right now, it only compares the number of rows and all columns and rows. + */ + @Override + protected void testQuery(String pinotQuery, String h2Query) + throws Exception { + String queryResourceUrl = getBrokerBaseApiUrl(); + Map<String, String> headers = getHeaders(); + Map<String, String> extraJsonProperties = getExtraQueryProperties(); + + // Get Pinot BrokerResponse without cursors + JsonNode pinotResponse; + pinotResponse = + ClusterTest.postQuery(pinotQuery, getBrokerQueryApiUrl(queryResourceUrl), headers, extraJsonProperties); + if (!pinotResponse.get("exceptions").isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotResponse); + } + int brokerResponseSize = pinotResponse.get("numRowsResultSet").asInt(); + + // Get a list of responses using cursors. + CursorResponse pinotPagingResponse; + pinotPagingResponse = JsonUtils.jsonNodeToObject(ClusterTest.postQuery(pinotQuery, + getBrokerQueryApiUrl(queryResourceUrl) + getCursorQueryProperties(_resultSize), headers, + getExtraQueryProperties()), CursorResponseNative.class); + if (!pinotPagingResponse.getExceptions().isEmpty()) { + throw new RuntimeException("Got Exceptions from Query Response: " + pinotPagingResponse.getExceptions().get(0)); + } + List<CursorResponse> resultPages = getAllResultPages(queryResourceUrl, headers, pinotPagingResponse, _resultSize); + + int brokerPagingResponseSize = 0; + for (CursorResponse response : resultPages) { + brokerPagingResponseSize += response.getNumRows(); + } + + // Compare the number of rows. + if (brokerResponseSize != brokerPagingResponseSize) { + throw new RuntimeException( + "Pinot # of rows from paging API " + brokerPagingResponseSize + " doesn't match # of rows from default API " + + brokerResponseSize); + } + } + + private List<CursorResponse> getAllResultPages(String queryResourceUrl, Map<String, String> headers, + CursorResponse firstResponse, int numRows) + throws Exception { + numRows = numRows == 0 ? CommonConstants.CursorConfigs.DEFAULT_CURSOR_FETCH_ROWS : numRows; + + List<CursorResponse> resultPages = new ArrayList<>(); + resultPages.add(firstResponse); + int totalRows = firstResponse.getNumRowsResultSet(); + + int offset = firstResponse.getNumRows(); + while (offset < totalRows) { + CursorResponse response = JsonUtils.stringToObject(ClusterTest.sendGetRequest( + getBrokerResponseApiUrl(queryResourceUrl, firstResponse.getRequestId()) + getCursorOffset(offset, numRows), + headers), CursorResponseNative.class); + resultPages.add(response); + offset += response.getNumRows(); + } + return resultPages; + } + + protected Object[][] getPageSizesAndQueryEngine() { + return new Object[][]{ + {false, 2}, {false, 3}, {false, 10}, {false, 0}, //0 trigger default behaviour + {true, 2}, {true, 3}, {true, 10}, {true, 0} //0 trigger default behaviour + }; + } + + @DataProvider(name = "pageSizeAndQueryEngineProvider") + public Object[][] pageSizeAndQueryEngineProvider() { + return getPageSizesAndQueryEngine(); + } Review Comment: nit: can be a single method. Also the method name and actual returned values has the order flipped (query engine / page size). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org