iverase commented on code in PR #13563: URL: https://github.com/apache/lucene/pull/13563#discussion_r1681080747
########## lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java: ########## @@ -207,65 +210,133 @@ void accumulate(long value) { maxValue = Math.max(maxValue, value); } + void accumulate(SkipAccumulator other) { + minDocID = Math.min(minDocID, other.minDocID); + maxDocID = Math.max(maxDocID, other.maxDocID); + minValue = Math.min(minValue, other.minValue); + maxValue = Math.max(maxValue, other.maxValue); + docCount += other.docCount; + } + void nextDoc(int docID) { maxDocID = docID; ++docCount; } - void writeTo(DataOutput output) throws IOException { - output.writeInt(maxDocID); - output.writeInt(minDocID); - output.writeLong(maxValue); - output.writeLong(minValue); - output.writeInt(docCount); + public static SkipAccumulator merge(List<SkipAccumulator> list, int index, int length) { + SkipAccumulator acc = new SkipAccumulator(list.get(index).minDocID); + for (int i = 0; i < length; i++) { + acc.accumulate(list.get(index + i)); + } + return acc; } } private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { assert field.hasDocValuesSkipIndex(); - // TODO: This disk compression once we introduce levels - long start = data.getFilePointer(); - SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); + final long start = data.getFilePointer(); + final SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); long globalMaxValue = Long.MIN_VALUE; long globalMinValue = Long.MAX_VALUE; int globalDocCount = 0; int maxDocId = -1; + final List<List<SkipAccumulator>> accumulators = new ArrayList<>(SKIP_INDEX_MAX_LEVEL); + for (int i = 0; i < SKIP_INDEX_MAX_LEVEL; i++) { + accumulators.add(new ArrayList<>()); + } SkipAccumulator accumulator = null; - int counter = 0; + final int maxAccumulators = 1 << (SKIP_INDEX_LEVEL_SHIFT * (SKIP_INDEX_MAX_LEVEL - 1)); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - if (counter == 0) { + if (accumulator == null) { accumulator = new SkipAccumulator(doc); + accumulators.get(0).add(accumulator); } accumulator.nextDoc(doc); for (int i = 0, end = values.docValueCount(); i < end; ++i) { accumulator.accumulate(values.nextValue()); } - if (++counter == skipIndexIntervalSize) { + if (accumulator.docCount == skipIndexIntervalSize) { globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); globalMinValue = Math.min(globalMinValue, accumulator.minValue); globalDocCount += accumulator.docCount; maxDocId = accumulator.maxDocID; - accumulator.writeTo(data); - counter = 0; + accumulator = null; + if (accumulators.size() == maxAccumulators) { + writeLevels(accumulators); + for (List<SkipAccumulator> accumulatorList : accumulators) { + accumulatorList.clear(); + } + } } } - if (counter > 0) { - globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); - globalMinValue = Math.min(globalMinValue, accumulator.minValue); - globalDocCount += accumulator.docCount; - maxDocId = accumulator.maxDocID; - accumulator.writeTo(data); + if (accumulators.isEmpty() == false) { + if (accumulator != null) { + globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); + globalMinValue = Math.min(globalMinValue, accumulator.minValue); + globalDocCount += accumulator.docCount; + maxDocId = accumulator.maxDocID; + } + writeLevels(accumulators); } meta.writeLong(start); // record the start in meta meta.writeLong(data.getFilePointer() - start); // record the length + assert globalDocCount == 0 || globalMaxValue >= globalMinValue; meta.writeLong(globalMaxValue); meta.writeLong(globalMinValue); + assert globalDocCount <= maxDocId + 1; meta.writeInt(globalDocCount); meta.writeInt(maxDocId); } + private void writeLevels(List<List<SkipAccumulator>> accumulators) throws IOException { + for (int i = 1; i < accumulators.size(); i++) { + buildLevel(accumulators.get(i), accumulators.get(i - 1)); + } + int totalAccumulators = accumulators.get(0).size(); + for (int index = 0; index < totalAccumulators; index++) { + // compute how many levels we need to write for the current accumulator + final int levels = getLevels(index, totalAccumulators); + // build the levels + final SkipAccumulator[] accLevels = new SkipAccumulator[levels]; + for (int level = 0; level < levels; level++) { + accLevels[level] = + accumulators.get(level).get(index / (1 << (SKIP_INDEX_LEVEL_SHIFT * level))); Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org