Re: [PR] Add test to check data and index types compatibility [pinot]

via GitHub Thu, 21 Nov 2024 23:46:08 -0800


gortiz commented on code in PR #14493:
URL: https://github.com/apache/pinot/pull/14493#discussion_r1853418182



##########
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TableIndexingTest.java:
##########
@@ -0,0 +1,776 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests;
+
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.stream.Collectors;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.metrics.ServerMetrics;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.IndexingConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.config.table.TimestampConfig;
+import org.apache.pinot.spi.config.table.TimestampIndexGranularity;
+import org.apache.pinot.spi.data.ComplexFieldSpec;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.MetricFieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.metrics.PinotMetricName;
+import org.apache.pinot.spi.metrics.PinotMetricUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.apache.pinot.util.TestUtils;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Ignore;
+import org.testng.annotations.Test;
+
+
+// Try to create various index types for all data type/cardinality/encoding 
combinations and report outcome.
+// NOTES: There is no multi-value type for BigDecimal, JSON or MAP.
+// see PinotDataType.getPinotDataTypeForIngestion()
+@Test(enabled = false)
+public class TableIndexingTest extends BaseClusterIntegrationTestSet {
+
+  private final ArrayList<String> _tableNames = new ArrayList<>();
+  private final int _allDocs = 3000;
+  private final SimpleDateFormat _format = new 
SimpleDateFormat("HH:mm:ss.SSS");
+  private final List<TestCase> _allResults = new ArrayList<>();
+
+  static class TestCase {
+    String _tableName;
+    String _indexType;
+    Throwable _error;
+
+    public TestCase(String tableName, String indexType) {
+      _tableName = tableName;
+      _indexType = indexType;
+    }
+
+    @Override
+    public String toString() {
+      return _tableName + "," + _indexType;
+    }
+  }
+
+  @BeforeClass
+  public void setUp()
+      throws Exception {
+    TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);
+
+    // Start the Pinot cluster
+    startZk();
+    startController();
+    startBrokers(1);
+    startServers(1);
+
+    // Create and upload the schema and table config
+    List<Schema> schemas = createSchemas();
+    addSchemas(schemas);
+    List<TableConfig> tableConfigs = createOfflineTableConfigs(schemas);
+    addTableConfigs(tableConfigs);
+
+    List<List<File>> avroFiles = createAvroFile(schemas);
+
+    for (int i = 0; i < schemas.size(); i++) {
+      // we've to use separate directories because segment tar files must 
exist for the duration of test
+      File schemaSegmentDir = new File(_segmentDir, "schema_" + i);
+      File schemaTarDir = new File(_tarDir, "schema_" + i);
+      TestUtils.ensureDirectoriesExistAndEmpty(schemaSegmentDir, schemaTarDir);
+      ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles.get(i), 
tableConfigs.get(i), schemas.get(i), 0,
+          schemaSegmentDir, schemaTarDir);
+      uploadSegments(schemas.get(i).getSchemaName(), schemaTarDir);
+    }
+
+    waitForAllDocsLoaded(schemas);
+  }
+
+  private void addTableConfigs(List<TableConfig> tableConfigs)
+      throws IOException {
+    for (TableConfig config : tableConfigs) {
+      super.addTableConfig(config);
+    }
+  }
+
+  private List<TableConfig> createOfflineTableConfigs(List<Schema> schemas) {
+    return
+        schemas.stream().map(s -> new TableConfigBuilder(TableType.OFFLINE)
+                .setTableName(s.getSchemaName())
+                .build())
+            .collect(Collectors.toList());
+  }
+
+  private void waitForAllDocsLoaded(final List<Schema> schemas) {
+    HashSet<String> incompleteTables = new HashSet<>();
+    for (Schema schema : schemas) {
+      incompleteTables.add(schema.getSchemaName());
+    }
+    List<String> toRemove = new ArrayList<>();
+
+    TestUtils.waitForCondition(() -> {
+          toRemove.clear();
+          for (String table : incompleteTables) {
+            if (getCurrentCountStarResult(table) == _allDocs) {
+              toRemove.add(table);
+            }
+          }
+          incompleteTables.removeAll(toRemove);
+          return incompleteTables.isEmpty();
+        }, 100L, 60_000L,
+        "Failed to load " + _allDocs + " documents", true, 
Duration.ofMillis(60_000L / 10));
+  }
+
+  @AfterClass
+  public void tearDown()
+      throws Exception {
+    stopServer();
+    stopBroker();
+    stopController();
+    stopZk();
+    FileUtils.deleteDirectory(_tempDir);
+  }
+
+  @Ignore
+  @Test(dataProvider = "fieldsAndIndexTypes")
+  public void testAddIndex(TestCase testCase)
+      throws Throwable {
+    try {
+      String schemaName = testCase._tableName;
+      String indexType = testCase._indexType;
+
+      System.out.println(
+          _format.format(new Date()) + " Starting check for column: " + 
schemaName + " index type: " + indexType);
+      Schema schema = getSchema(schemaName);
+      FieldSpec field = schema.getFieldSpecFor("col");
+
+      // These exceptions are thrown during segment reload (and not table 
config update) and appear in logs only
+      // We're throwing them here to make test faster and improve output.
+      if ("geo".equals(indexType) && field.getDataType() != DataType.BYTES) {
+        throw new RuntimeException("Geo/H3 index can only be applied to column 
of BYTES data type!");
+      }
+
+      if ("json".equals(indexType) && ((field.getDataType() != DataType.STRING 
&& field.getDataType() != DataType.JSON)
+          || !field.isSingleValueField())) {
+        throw new RuntimeException(
+            "JSON index can only be applied to single value column of STRING 
or JSON data type!");
+      }
+
+      if ("vector".equals(indexType) && (field.getDataType() != DataType.FLOAT 
|| field.isSingleValueField())) {
+        throw new RuntimeException("VECTOR index can only be applied to Float 
Array columns");
+      }
+
+      if (("text".equals(indexType) || "native_text".equals(indexType)) && 
field.getDataType() != DataType.STRING) {
+        throw new RuntimeException("Text index is currently only supported on 
STRING columns");
+      }

Review Comment:
   I don't understand this. What does this failure means? If we want to skip 
these cases we can either:
   1. Preferred option: Filter them out in `fieldsAndIndexTypes`
   2. Alternatively: We can throw SkipException, which makes TestNG to skip the 
test.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Re: [PR] Add test to check data and index types compatibility [pinot]

Reply via email to