gortiz commented on code in PR #14493: URL: https://github.com/apache/pinot/pull/14493#discussion_r1853418182
########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TableIndexingTest.java: ########## @@ -0,0 +1,776 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.integration.tests; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.text.SimpleDateFormat; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.metrics.ServerMetrics; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.IndexingConfig; +import org.apache.pinot.spi.config.table.StarTreeIndexConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.TimestampConfig; +import org.apache.pinot.spi.config.table.TimestampIndexGranularity; +import org.apache.pinot.spi.data.ComplexFieldSpec; +import org.apache.pinot.spi.data.DimensionFieldSpec; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.data.MetricFieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.metrics.PinotMetricName; +import org.apache.pinot.spi.metrics.PinotMetricUtils; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.apache.pinot.util.TestUtils; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Ignore; +import org.testng.annotations.Test; + + +// Try to create various index types for all data type/cardinality/encoding combinations and report outcome. +// NOTES: There is no multi-value type for BigDecimal, JSON or MAP. +// see PinotDataType.getPinotDataTypeForIngestion() +@Test(enabled = false) +public class TableIndexingTest extends BaseClusterIntegrationTestSet { + + private final ArrayList<String> _tableNames = new ArrayList<>(); + private final int _allDocs = 3000; + private final SimpleDateFormat _format = new SimpleDateFormat("HH:mm:ss.SSS"); + private final List<TestCase> _allResults = new ArrayList<>(); + + static class TestCase { + String _tableName; + String _indexType; + Throwable _error; + + public TestCase(String tableName, String indexType) { + _tableName = tableName; + _indexType = indexType; + } + + @Override + public String toString() { + return _tableName + "," + _indexType; + } + } + + @BeforeClass + public void setUp() + throws Exception { + TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir); + + // Start the Pinot cluster + startZk(); + startController(); + startBrokers(1); + startServers(1); + + // Create and upload the schema and table config + List<Schema> schemas = createSchemas(); + addSchemas(schemas); + List<TableConfig> tableConfigs = createOfflineTableConfigs(schemas); + addTableConfigs(tableConfigs); + + List<List<File>> avroFiles = createAvroFile(schemas); + + for (int i = 0; i < schemas.size(); i++) { + // we've to use separate directories because segment tar files must exist for the duration of test + File schemaSegmentDir = new File(_segmentDir, "schema_" + i); + File schemaTarDir = new File(_tarDir, "schema_" + i); + TestUtils.ensureDirectoriesExistAndEmpty(schemaSegmentDir, schemaTarDir); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles.get(i), tableConfigs.get(i), schemas.get(i), 0, + schemaSegmentDir, schemaTarDir); + uploadSegments(schemas.get(i).getSchemaName(), schemaTarDir); + } + + waitForAllDocsLoaded(schemas); + } + + private void addTableConfigs(List<TableConfig> tableConfigs) + throws IOException { + for (TableConfig config : tableConfigs) { + super.addTableConfig(config); + } + } + + private List<TableConfig> createOfflineTableConfigs(List<Schema> schemas) { + return + schemas.stream().map(s -> new TableConfigBuilder(TableType.OFFLINE) + .setTableName(s.getSchemaName()) + .build()) + .collect(Collectors.toList()); + } + + private void waitForAllDocsLoaded(final List<Schema> schemas) { + HashSet<String> incompleteTables = new HashSet<>(); + for (Schema schema : schemas) { + incompleteTables.add(schema.getSchemaName()); + } + List<String> toRemove = new ArrayList<>(); + + TestUtils.waitForCondition(() -> { + toRemove.clear(); + for (String table : incompleteTables) { + if (getCurrentCountStarResult(table) == _allDocs) { + toRemove.add(table); + } + } + incompleteTables.removeAll(toRemove); + return incompleteTables.isEmpty(); + }, 100L, 60_000L, + "Failed to load " + _allDocs + " documents", true, Duration.ofMillis(60_000L / 10)); + } + + @AfterClass + public void tearDown() + throws Exception { + stopServer(); + stopBroker(); + stopController(); + stopZk(); + FileUtils.deleteDirectory(_tempDir); + } + + @Ignore + @Test(dataProvider = "fieldsAndIndexTypes") + public void testAddIndex(TestCase testCase) + throws Throwable { + try { + String schemaName = testCase._tableName; + String indexType = testCase._indexType; + + System.out.println( + _format.format(new Date()) + " Starting check for column: " + schemaName + " index type: " + indexType); + Schema schema = getSchema(schemaName); + FieldSpec field = schema.getFieldSpecFor("col"); + + // These exceptions are thrown during segment reload (and not table config update) and appear in logs only + // We're throwing them here to make test faster and improve output. + if ("geo".equals(indexType) && field.getDataType() != DataType.BYTES) { + throw new RuntimeException("Geo/H3 index can only be applied to column of BYTES data type!"); + } + + if ("json".equals(indexType) && ((field.getDataType() != DataType.STRING && field.getDataType() != DataType.JSON) + || !field.isSingleValueField())) { + throw new RuntimeException( + "JSON index can only be applied to single value column of STRING or JSON data type!"); + } + + if ("vector".equals(indexType) && (field.getDataType() != DataType.FLOAT || field.isSingleValueField())) { + throw new RuntimeException("VECTOR index can only be applied to Float Array columns"); + } + + if (("text".equals(indexType) || "native_text".equals(indexType)) && field.getDataType() != DataType.STRING) { + throw new RuntimeException("Text index is currently only supported on STRING columns"); + } Review Comment: I don't understand this. What does this failure means? If we want to skip these cases we can either: 1. Preferred option: Filter them out in `fieldsAndIndexTypes` 2. Alternatively: We can throw SkipException, which makes TestNG to skip the test. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org