richardstartin commented on a change in pull request #8264: URL: https://github.com/apache/pinot/pull/8264#discussion_r816680897
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/IsNotNullTransformFunction.java ########## @@ -0,0 +1,69 @@ +package org.apache.pinot.core.operator.transform.function; + +import com.google.common.base.Preconditions; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.pinot.core.operator.blocks.ProjectionBlock; +import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader; + + +public class IsNotNullTransformFunction extends BaseTransformFunction { + public static final String FUNCTION_NAME = "IS_NOT_NULL"; + + private TransformFunction _leftTransformFunction; + private int[] _results; + private Map<String, DataSource> _dataSourceMap = new HashMap<>(); + + @Override + public String getName() { + return FUNCTION_NAME; + } + + @Override + public void init(List<TransformFunction> arguments, Map<String, DataSource> dataSourceMap) { + Preconditions.checkArgument(arguments.size() == 1, + "Exact 1 argument is required for IS_NOT_NULL operator function"); + _leftTransformFunction = arguments.get(0); + if (!(_leftTransformFunction instanceof IdentifierTransformFunction)) { + throw new IllegalArgumentException( + "Only column names are supported in IS_NOT_NULL. Support for functions is planned for future release"); + } + _dataSourceMap = dataSourceMap; + } + + @Override + public TransformResultMetadata getResultMetadata() { + return BOOLEAN_SV_NO_DICTIONARY_METADATA; + } + + @Override + public int[] transformToIntValuesSV(ProjectionBlock projectionBlock) { + int length = projectionBlock.getNumDocs(); + if (_results == null || _results.length < length) { + _results = new int[length]; + } + + int[] docIds = projectionBlock.getDocIds(); + String columnName = ((IdentifierTransformFunction) _leftTransformFunction).getColumnName(); + NullValueVectorReader nullValueVector = _dataSourceMap.get(columnName).getNullValueVector(); + + if (nullValueVector != null) { + for (int idx = 0; idx < length; idx++) { + int docId = docIds[idx]; + if (nullValueVector.isNull(docId)) { + _results[idx] = 0; + } else { + _results[idx] = 1; + } + } Review comment: Try something like this: ```java if (nullValueVector != null) { PeekableIntIterator it = nullValueVector.getNullBitmap().getIntIterator(); int pos = 0; while (it.hasNext() & pos < length) { it.advanceIfNeeded(docIds[pos]); pos = Arrays.binarySearch(docIds, pos, length, it.next()); if (pos >= 0) { _results[pos] = 0; pos++; } else { pos = -pos - 1; } } } ``` Since blocks are iterated in ascending docId order, it would be better to store the iterator in the TransformFunction which would avoid mapping and advancing the bitmap once per block (currently this code maps the bitmap once per row). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org