jpountz commented on code in PR #13779: URL: https://github.com/apache/lucene/pull/13779#discussion_r1758641965
########## lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java: ########## @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import org.apache.lucene.codecs.lucene90.IndexedDISI; +import org.apache.lucene.document.KnnByteVectorField; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; + +/** + * This class abstracts addressing of document vector values indexed as {@link KnnFloatVectorField} + * or {@link KnnByteVectorField}. + * + * @lucene.experimental + */ +public abstract class KnnVectorValues { + + /** The iterator associated with these values. */ + protected DocIndexIterator iterator; + + /** Return the dimension of the vectors */ + public abstract int dimension(); + + /** + * Return the number of vectors for this field. + * + * @return the number of vectors returned by this iterator + */ + public abstract int size(); + + /** + * Return the docid of the document indexed with the given vector ordinal. This default + * implementation returns the argument and is appropriate for dense values implementations where + * every doc has a single value. + */ + public int ordToDoc(int ord) { + return ord; + } + + /** + * Creates a new copy of this {@link KnnVectorValues}. This is helpful when you need to access + * different values at once, to avoid overwriting the underlying vector returned. + */ + public abstract KnnVectorValues copy() throws IOException; + + /** Returns the vector byte length, defaults to dimension multiplied by float byte size */ + public int getVectorByteLength() { + return dimension() * getEncoding().byteSize; + } + + /** The vector encoding of these values. */ + public abstract VectorEncoding getEncoding(); + + /** Returns a Bits accepting docs accepted by the argument and having a vector value */ + public Bits getAcceptOrds(Bits acceptDocs) { + // FIXME: change default to return acceptDocs and provide this impl + // somewhere more specialized (in every non-dense impl). + if (acceptDocs == null) { + return null; + } + return new Bits() { + @Override + public boolean get(int index) { + return acceptDocs.get(ordToDoc(index)); + } + + @Override + public int length() { + return size(); + } + }; + } + + /** + * Return the iterator for this instance. If you need multiple iterators, call <code> + * this.copy().iterator()</code>. + */ + public DocIndexIterator iterator() { + if (iterator == null) { + iterator = createIterator(); + } + return iterator; + } + + /** + * Create an iterator for this instance; typically called once by <code>iterator()</code>. Wrapper + * value classes delegate to their inner instance's iterator and shouldn't implement this. + */ + protected DocIndexIterator createIterator() { + throw new UnsupportedOperationException(); + } + + /** + * A DocIdSetIterator that also provides an index() method tracking a distinct ordinal for a + * vector associated with each doc. + */ + public abstract static class DocIndexIterator extends DocIdSetIterator { + + /** return the value index (aka "ordinal" or "ord") corresponding to the current doc */ + public abstract int index(); + + @Override + public int advance(int target) throws IOException { + return slowAdvance(target); + } + + @Override + public long cost() { + throw new UnsupportedOperationException(); Review Comment: If we default cost() to returning size(), that would work for me. But I'm not comfortable with having implementations of DocIdSetIterator#cost that may throw, which means e.g. that they cannot be put in a Conjunction DISI(which will want to sort its clauses by cost). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org