Jackie-Jiang commented on code in PR #13037: URL: https://github.com/apache/pinot/pull/13037#discussion_r1593104346
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java: ########## @@ -757,57 +753,157 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct } else { useVarLengthDictionary = _indexLoadingConfig.getVarLengthDictionaryColumns().contains(column); } - indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, true, useVarLengthDictionary, true, - new ByteArray((byte[]) fieldSpec.getDefaultNullValue())); + indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, createDictionary, useVarLengthDictionary, + true, new ByteArray((byte[]) fieldSpec.getDefaultNullValue())); break; } default: throw new IllegalStateException(); } - // Create dictionary - try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(fieldSpec, _indexDir, - indexCreationInfo.isUseVarLengthDictionary())) { - dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray()); + if (createDictionary) { + createDerivedColumnForwardIndexWithDictionary(column, fieldSpec, outputValues, indexCreationInfo); + } else { + createDerivedColumnForwardIndexWithoutDictionary(column, fieldSpec, outputValues, indexCreationInfo); + } + } finally { + for (ValueReader valueReader : valueReaders) { + valueReader.close(); + } + } + } + + /** + * Helper method to create the dictionary and forward indices for a column with derived values. + */ + private void createDerivedColumnForwardIndexWithDictionary(String column, FieldSpec fieldSpec, Object[] outputValues, + ColumnIndexCreationInfo indexCreationInfo) throws Exception { + + // Create dictionary + try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(fieldSpec, _indexDir, + indexCreationInfo.isUseVarLengthDictionary())) { + dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray()); + + int numDocs = outputValues.length; - // Create forward index - int cardinality = indexCreationInfo.getDistinctValueCount(); + // Create forward index + boolean isSingleValue = fieldSpec.isSingleValueField(); + + try (ForwardIndexCreator forwardIndexCreator + = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, column, true)) { if (isSingleValue) { - try (ForwardIndexCreator forwardIndexCreator = indexCreationInfo.isSorted() - ? new SingleValueSortedForwardIndexCreator(_indexDir, column, cardinality) - : new SingleValueUnsortedForwardIndexCreator(_indexDir, column, cardinality, numDocs)) { - for (int i = 0; i < numDocs; i++) { - forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValues[i])); - } + for (Object outputValue : outputValues) { + forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValue)); } } else { - DictIdCompressionType dictIdCompressionType = null; - FieldIndexConfigs fieldIndexConfig = _indexLoadingConfig.getFieldIndexConfig(column); - if (fieldIndexConfig != null) { - ForwardIndexConfig forwardIndexConfig = fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType()); - if (forwardIndexConfig != null) { - dictIdCompressionType = forwardIndexConfig.getDictIdCompressionType(); - } - } - try (ForwardIndexCreator forwardIndexCreator = dictIdCompressionType == DictIdCompressionType.MV_ENTRY_DICT - ? new MultiValueEntryDictForwardIndexCreator(_indexDir, column, cardinality, numDocs) - : new MultiValueUnsortedForwardIndexCreator(_indexDir, column, cardinality, numDocs, - indexCreationInfo.getTotalNumberOfEntries())) { - for (int i = 0; i < numDocs; i++) { - forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValues[i])); - } + for (Object outputValue : outputValues) { + forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValue)); } } - // Add the column metadata SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, indexCreationInfo, numDocs, fieldSpec, true, dictionaryCreator.getNumBytesPerEntry()); } - } finally { - for (ValueReader valueReader : valueReaders) { - valueReader.close(); + } + } + + /** + * Helper method to create a forward index for a raw encoded column with derived values. + */ + private void createDerivedColumnForwardIndexWithoutDictionary(String column, FieldSpec fieldSpec, + Object[] outputValues, ColumnIndexCreationInfo indexCreationInfo) + throws Exception { + + // Create forward index + int numDocs = outputValues.length; + boolean isSingleValue = fieldSpec.isSingleValueField(); + + try (ForwardIndexCreator forwardIndexCreator + = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs, column, false)) { + if (isSingleValue) { + for (Object outputValue : outputValues) { + switch (fieldSpec.getDataType().getStoredType()) { + // Casts are safe here because we've already done the conversion in createDerivedColumnV1Indices + case INT: + forwardIndexCreator.putInt((int) outputValue); + break; + case LONG: + forwardIndexCreator.putLong((long) outputValue); + break; + case FLOAT: + forwardIndexCreator.putFloat((float) outputValue); + break; + case DOUBLE: + forwardIndexCreator.putDouble((double) outputValue); + break; + case BIG_DECIMAL: + forwardIndexCreator.putBigDecimal((BigDecimal) outputValue); + break; + case STRING: + forwardIndexCreator.putString((String) outputValue); + break; + case BYTES: + forwardIndexCreator.putBytes((byte[]) outputValue); + break; + default: + throw new IllegalStateException(); + } + } + } else { + for (Object outputValue : outputValues) { + switch (fieldSpec.getDataType().getStoredType()) { + // Casts are safe here because we've already done the conversion in createDerivedColumnV1Indices + case INT: + forwardIndexCreator.putIntMV(ArrayUtils.toPrimitive((Integer[]) outputValue)); Review Comment: Ideally we can directly cast to array of primitive type in the previous step. Let me know if you want to address that in the same PR, or we can add a TODO to address it later -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org