jpountz commented on code in PR #12436:
URL: https://github.com/apache/lucene/pull/12436#discussion_r1275169909


##########
lucene/core/src/java/org/apache/lucene/index/IndexingChain.java:
##########
@@ -621,6 +621,12 @@ private void initializeFieldInfo(PerField pf) throws 
IOException {
       final Sort indexSort = indexWriterConfig.getIndexSort();
       validateIndexSortDVType(indexSort, pf.fieldName, s.docValuesType);
     }
+    if (s.vectorDimension != 0) {
+      validateMaxVectorDimension(
+          pf.fieldName,
+          s.vectorDimension,
+          indexWriterConfig.getCodec().knnVectorsFormat().getMaxDimensions());
+    }

Review Comment:
   I worry that this adds a hashtable lookup on a hot code path. Maybe it's not 
that bad for vectors, which are slow to index anyway, but I'd rather avoid it. 
What about making the codec responsible for checking the limit? Something like 
below:
   
   ```patch
   diff --git 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   index cb3e5ef8b10..6c365e53528 100644
   --- 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   +++ 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   @@ -108,6 +108,9 @@ public final class Lucene95HnswVectorsFormat extends 
KnnVectorsFormat {
      public static final int VERSION_START = 0;
      public static final int VERSION_CURRENT = VERSION_START;
    
   +  /** A maximum number of vector dimensions supported by this codeс */
   +  public static final int MAX_DIMENSIONS = 1024;
   +
      /**
       * A maximum configurable maximum max conn.
       *
   @@ -177,7 +180,7 @@ public final class Lucene95HnswVectorsFormat extends 
KnnVectorsFormat {
    
      @Override
      public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws 
IOException {
   -    return new Lucene95HnswVectorsWriter(state, maxConn, beamWidth);
   +    return new Lucene95HnswVectorsWriter(state, maxConn, beamWidth, 
MAX_DIMENSIONS);
      }
    
      @Override
   diff --git 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   index 5358d66f16e..196f12a21ad 100644
   --- 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   +++ 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   @@ -60,13 +60,15 @@ public final class Lucene95HnswVectorsWriter extends 
KnnVectorsWriter {
      private final IndexOutput meta, vectorData, vectorIndex;
      private final int M;
      private final int beamWidth;
   +  private final int maxDimension;
    
      private final List<FieldWriter<?>> fields = new ArrayList<>();
      private boolean finished;
    
   -  Lucene95HnswVectorsWriter(SegmentWriteState state, int M, int beamWidth) 
throws IOException {
   +  Lucene95HnswVectorsWriter(SegmentWriteState state, int M, int beamWidth, 
int maxDimension) throws IOException {
        this.M = M;
        this.beamWidth = beamWidth;
   +    this.maxDimension = maxDimension;
        segmentWriteState = state;
        String metaFileName =
            IndexFileNames.segmentFileName(
   @@ -117,6 +119,9 @@ public final class Lucene95HnswVectorsWriter extends 
KnnVectorsWriter {
    
      @Override
      public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws 
IOException {
   +    if (fieldInfo.getVectorDimension() > maxDimension) {
   +      throw new IllegalArgumentException("Number of dimensions " + 
fieldInfo.getVectorDimension() + " for field " + fieldInfo.name + " exceeds the 
limit of " + maxDimension);
   +    }
        FieldWriter<?> newField =
            FieldWriter.create(fieldInfo, M, beamWidth, 
segmentWriteState.infoStream);
        fields.add(newField);
   ```



##########
lucene/core/src/java/org/apache/lucene/index/IndexingChain.java:
##########
@@ -621,6 +621,12 @@ private void initializeFieldInfo(PerField pf) throws 
IOException {
       final Sort indexSort = indexWriterConfig.getIndexSort();
       validateIndexSortDVType(indexSort, pf.fieldName, s.docValuesType);
     }
+    if (s.vectorDimension != 0) {
+      validateMaxVectorDimension(
+          pf.fieldName,
+          s.vectorDimension,
+          indexWriterConfig.getCodec().knnVectorsFormat().getMaxDimensions());
+    }

Review Comment:
   I worry that this adds a hashtable lookup on a hot code path. Maybe it's not 
that bad for vectors, which are slow to index anyway, but I'd rather avoid it. 
What about making the codec responsible for checking the limit? Something like 
below:
   
   ```patch
   diff --git 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   index cb3e5ef8b10..6c365e53528 100644
   --- 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   +++ 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsFormat.java
   @@ -108,6 +108,9 @@ public final class Lucene95HnswVectorsFormat extends 
KnnVectorsFormat {
      public static final int VERSION_START = 0;
      public static final int VERSION_CURRENT = VERSION_START;
    
   +  /** A maximum number of vector dimensions supported by this codeс */
   +  public static final int MAX_DIMENSIONS = 1024;
   +
      /**
       * A maximum configurable maximum max conn.
       *
   @@ -177,7 +180,7 @@ public final class Lucene95HnswVectorsFormat extends 
KnnVectorsFormat {
    
      @Override
      public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws 
IOException {
   -    return new Lucene95HnswVectorsWriter(state, maxConn, beamWidth);
   +    return new Lucene95HnswVectorsWriter(state, maxConn, beamWidth, 
MAX_DIMENSIONS);
      }
    
      @Override
   diff --git 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   index 5358d66f16e..196f12a21ad 100644
   --- 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   +++ 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
   @@ -60,13 +60,15 @@ public final class Lucene95HnswVectorsWriter extends 
KnnVectorsWriter {
      private final IndexOutput meta, vectorData, vectorIndex;
      private final int M;
      private final int beamWidth;
   +  private final int maxDimension;
    
      private final List<FieldWriter<?>> fields = new ArrayList<>();
      private boolean finished;
    
   -  Lucene95HnswVectorsWriter(SegmentWriteState state, int M, int beamWidth) 
throws IOException {
   +  Lucene95HnswVectorsWriter(SegmentWriteState state, int M, int beamWidth, 
int maxDimension) throws IOException {
        this.M = M;
        this.beamWidth = beamWidth;
   +    this.maxDimension = maxDimension;
        segmentWriteState = state;
        String metaFileName =
            IndexFileNames.segmentFileName(
   @@ -117,6 +119,9 @@ public final class Lucene95HnswVectorsWriter extends 
KnnVectorsWriter {
    
      @Override
      public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws 
IOException {
   +    if (fieldInfo.getVectorDimension() > maxDimension) {
   +      throw new IllegalArgumentException("Number of dimensions " + 
fieldInfo.getVectorDimension() + " for field " + fieldInfo.name + " exceeds the 
limit of " + maxDimension);
   +    }
        FieldWriter<?> newField =
            FieldWriter.create(fieldInfo, M, beamWidth, 
segmentWriteState.infoStream);
        fields.add(newField);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to