s1monw commented on code in PR #12829: URL: https://github.com/apache/lucene/pull/12829#discussion_r1403178782
########## lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java: ########## @@ -3173,4 +3173,184 @@ public void testSortDocsAndFreqsAndPositionsAndOffsets() throws IOException { reader.close(); dir.close(); } + + public void testBlockIsMissingParentField() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + String parentField = "parent"; + Sort indexSort = new Sort(parentField, new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + List<Runnable> runnabels = + Arrays.asList( + () -> { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + writer.addDocuments(Arrays.asList(new Document(), new Document())); + }); + assertEquals( + "the last document in the block must contain a numeric doc values field named: parent", + ex.getMessage()); + }, + () -> { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + Document doc = new Document(); + doc.add(new NumericDocValuesField("parent", 0)); + writer.addDocuments(Arrays.asList(doc, new Document())); + }); + assertEquals( + "only the last document in the block must contain a numeric doc values field named: parent", + ex.getMessage()); + }, + () -> { + IllegalArgumentException ex = + expectThrows( + IllegalArgumentException.class, + () -> { + writer.addDocuments(Arrays.asList(new Document())); + }); + assertEquals( + "the last document in the block must contain a numeric doc values field named: parent", + ex.getMessage()); + }); + Collections.shuffle(runnabels, random()); + for (Runnable runnable : runnabels) { + runnable.run(); + } + } + } + } + + public void testIndexWithSortIsCongruent() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + String parentField = "parent"; + Sort indexSort = new Sort(parentField, new SortField("foo", SortField.Type.INT)); + iwc.setIndexSort(indexSort); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + Document child1 = new Document(); + child1.add(new StringField("id", Integer.toString(1), Store.YES)); + Document child2 = new Document(); + child2.add(new StringField("id", Integer.toString(1), Store.YES)); + Document parent = new Document(); + parent.add(new StringField("id", Integer.toString(1), Store.YES)); + parent.add(new NumericDocValuesField(parentField, 0)); Review Comment: no it has no meaning, we could add this field automatically at index time if a parent field is set on IWC or the sort and record the no. of children. If you wanna model multiple levels of nesting you can do that on top of the API. All we guarantee is that this block of docs is unchanged across merges. I don't think we should do more. you are free to use whatever marker you want to identify sub-roots. Yet, you can't use the parent field I think that is reserved for the top level. One upside of having a single value for all parents is that is't very storage efficient. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org