lgo opened a new issue #5900: URL: https://github.com/apache/incubator-pinot/issues/5900
(also non-critical, mostly just usability for being able to flexibility configure Pinot) While experimenting with some schemas, I tried setting up a star-index to use an ingest transform created column. Similarly, setting up other inverted indexes also failed, so I reckon the schemas are just not adjusted by ingestion transforms. Here was an example set-up that produces this. schema ```json { "schemaName": "testdata", "dimensionFieldSpecs": [ { "name": "fields", "dataType": "STRING", "singleValueField": false }, { "name": "id", "dataType": "STRING" } ], "metricFieldSpecs": [ { "name": "amount", "dataType": "DOUBLE" } ], "dateTimeFieldSpecs": [ { "name": "created_at", "dataType": "LONG", "format": "1:SECONDS:EPOCH", "granularity": "15:MINUTES" } ] } ``` table ```json { "tableName": "testdata", "tableType": "OFFLINE", "routing": { "segmentPrunerType": "partition" }, "segmentsConfig": { "timeColumnName": "created_at", "timeType": "SECONDS", "replication": "1", "schemaName": "testdata", "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy", "segmentPushFrequency": "HOURLY", "segmentPushType": "APPEND" }, "ingestionConfig": { "transformConfigs": [{ "columnName": "id_transform", "transformFunction": "Groovy({id}, id)" }] }, "tableIndexConfig": { "loadMode": "MMAP", "createInvertedIndexDuringSegmentGeneration": true, "invertedIndexColumns": ["id_transform"], "starTreeIndexConfigs": [{ "dimensionsSplitOrder": [ "id_transform" ], "functionColumnPairs": [ "COUNT" ], "maxLeafRecords": 1 }] }, "tenants": {}, "metadata": {} } ``` exception with only the star-index ``` 2020/08/19 19:02:55.391 ERROR [LaunchDataIngestionJobCommand] [main] Got exception to kick off standalone data ingestion job - java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:137) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:114) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:123) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.main(LaunchDataIngestionJobCommand.java:65) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] Caused by: java.lang.NullPointerException at org.apache.pinot.core.segment.index.datasource.ImmutableDataSource$ImmutableDataSourceMetadata.<init>(ImmutableDataSource.java:54) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.index.datasource.ImmutableDataSource.<init>(ImmutableDataSource.java:36) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.indexsegment.immutable.ImmutableSegmentImpl.getDataSource(ImmutableSegmentImpl.java:94) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.data.readers.PinotSegmentColumnReader.<init>(PinotSegmentColumnReader.java:38) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.BaseSingleTreeBuilder.<init>(BaseSingleTreeBuilder.java:129) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.OffHeapSingleTreeBuilder.<init>(OffHeapSingleTreeBuilder.java:69) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.MultipleTreesBuilder.getSingleTreeBuilder(MultipleTreesBuilder.java:157) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.MultipleTreesBuilder.build(MultipleTreesBuilder.java:130) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.buildStarTreeV2IfNecessary(SegmentIndexCreationDriverImpl.java:298) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.handlePostCreation(SegmentIndexCreationDriverImpl.java:263) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.build(SegmentIndexCreationDriverImpl.java:223) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:102) ~[pinot-batch-ingestion-hadoop-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:190) ~[pinot-batch-ingestion-standalone-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:135) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] ... 3 more 2020/08/19 19:02:55.394 ERROR [LaunchDataIngestionJobCommand] [main] Exception caught: java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:137) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:114) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:123) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.main(LaunchDataIngestionJobCommand.java:65) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] Caused by: java.lang.NullPointerException at org.apache.pinot.core.segment.index.datasource.ImmutableDataSource$ImmutableDataSourceMetadata.<init>(ImmutableDataSource.java:54) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.index.datasource.ImmutableDataSource.<init>(ImmutableDataSource.java:36) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.indexsegment.immutable.ImmutableSegmentImpl.getDataSource(ImmutableSegmentImpl.java:94) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.data.readers.PinotSegmentColumnReader.<init>(PinotSegmentColumnReader.java:38) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.BaseSingleTreeBuilder.<init>(BaseSingleTreeBuilder.java:129) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.OffHeapSingleTreeBuilder.<init>(OffHeapSingleTreeBuilder.java:69) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.MultipleTreesBuilder.getSingleTreeBuilder(MultipleTreesBuilder.java:157) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.startree.v2.builder.MultipleTreesBuilder.build(MultipleTreesBuilder.java:130) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.buildStarTreeV2IfNecessary(SegmentIndexCreationDriverImpl.java:298) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.handlePostCreation(SegmentIndexCreationDriverImpl.java:263) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.build(SegmentIndexCreationDriverImpl.java:223) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:102) ~[pinot-batch-ingestion-hadoop-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:190) ~[pinot-batch-ingestion-standalone-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:135) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] ... 3 more ``` exception with only the inverted index ``` 2020/08/19 19:02:26.643 ERROR [LaunchDataIngestionJobCommand] [main] Got exception to kick off standalone data ingestion job - java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:137) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:114) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:123) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.main(LaunchDataIngestionJobCommand.java:65) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] Caused by: java.lang.IllegalStateException: Cannot create inverted index for column: id_transform because it is not in schema at shaded.com.google.common.base.Preconditions.checkState(Preconditions.java:518) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator.init(SegmentColumnarIndexCreator.java:117) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.build(SegmentIndexCreationDriverImpl.java:178) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:102) ~[pinot-batch-ingestion-hadoop-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:190) ~[pinot-batch-ingestion-standalone-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:135) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] ... 3 more 2020/08/19 19:02:26.646 ERROR [LaunchDataIngestionJobCommand] [main] Exception caught: java.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:137) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:114) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:123) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.main(LaunchDataIngestionJobCommand.java:65) [pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] Caused by: java.lang.IllegalStateException: Cannot create inverted index for column: id_transform because it is not in schema at shaded.com.google.common.base.Preconditions.checkState(Preconditions.java:518) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator.init(SegmentColumnarIndexCreator.java:117) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl.build(SegmentIndexCreationDriverImpl.java:178) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:102) ~[pinot-batch-ingestion-hadoop-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.run(SegmentGenerationJobRunner.java:190) ~[pinot-batch-ingestion-standalone-0.5.0-2020-08-13-SNAPSHOT-shaded.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:135) ~[pinot-all-0.5.0-2020-08-13-SNAPSHOT-jar-with-dependencies.jar:0.5.0-2020-08-13-SNAPSHOT-5ac67eb1a1210cb6c0c76d81d138cac29d0d3d4e] ... 3 more ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org