Re: [PR] Inline skip data into postings lists [lucene]

via GitHub Tue, 23 Jul 2024 14:45:15 -0700


mikemccand commented on code in PR #13585:
URL: https://github.com/apache/lucene/pull/13585#discussion_r1688761798



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsWriter.java:
##########
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene912;
+
+import static org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.*;
+import static 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.BLOCK_SIZE;
+import static 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.DOC_CODEC;
+import static 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.PAY_CODEC;
+import static 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.TERMS_CODEC;
+import static 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.VERSION_CURRENT;
+
+import java.io.IOException;
+import java.util.Collection;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.PushPostingsWriterBase;
+import 
org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat.IntBlockTermState;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+public class Lucene912PostingsWriter extends PushPostingsWriterBase {
+
+  static final IntBlockTermState EMPTY_STATE = new IntBlockTermState();
+
+  IndexOutput docOut;
+  IndexOutput posOut;
+  IndexOutput payOut;
+
+  IntBlockTermState lastState;
+
+  // Holds starting file pointers for current term:
+  private long docStartFP;
+  private long posStartFP;
+  private long payStartFP;
+
+  final long[] docDeltaBuffer;
+  final long[] freqBuffer;
+  private int docBufferUpto;
+
+  final long[] posDeltaBuffer;
+  final long[] payloadLengthBuffer;
+  final long[] offsetStartDeltaBuffer;
+  final long[] offsetLengthBuffer;
+  private int posBufferUpto;
+
+  private byte[] payloadBytes;
+  private int payloadByteUpto;
+
+  private int lastBlockDocID;
+  private long lastBlockPosFP;
+  private long lastBlockPayFP;
+
+  private int lastSkipDocID;
+  private long lastSkipPosFP;
+  private long lastSkipPayFP;
+
+  private int docID;
+  private int lastDocID;
+  private int lastPosition;
+  private int lastStartOffset;
+  private int docCount;
+
+  private final PForUtil pforUtil;
+  private final ForDeltaUtil forDeltaUtil;
+
+  private boolean fieldHasNorms;
+  private NumericDocValues norms;
+  private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator =
+      new CompetitiveImpactAccumulator();
+  private final CompetitiveImpactAccumulator 
skipCompetitiveFreqNormAccumulator =
+      new CompetitiveImpactAccumulator();
+
+  /** Spare output that we use to be able to prepend the encoded length, e.g. 
impacts. */
+  private final ByteBuffersDataOutput spareOutput = 
ByteBuffersDataOutput.newResettableInstance();
+
+  /**
+   * Output for a single block. This is useful to be able to prepend skip data 
before each block,
+   * which can only be computed once the block is encoded. The content is then 
typically copied to
+   * {@link #skipOutput}.
+   */
+  private final ByteBuffersDataOutput blockOutput = 
ByteBuffersDataOutput.newResettableInstance();
+
+  /**
+   * Output for groups of 32 blocks. This is useful to prepend skip data for 
these 32 blocks, which
+   * can only be done once we have encoded these 32 blocks. The content is 
then typically copied to
+   * {@link #docCount}.

Review Comment:
   `docCount` -> `docOut`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Re: [PR] Inline skip data into postings lists [lucene]

Reply via email to