jpountz commented on a change in pull request #103: URL: https://github.com/apache/lucene/pull/103#discussion_r622844633
########## File path: lucene/core/src/test/org/apache/lucene/index/TestTermVectors.java ########## @@ -166,4 +118,97 @@ public void testFullMergeAddIndexesReader() throws Exception { verifyIndex(target); IOUtils.close(target, input[0], input[1]); } + + /** + * Assert that a merged segment has payloads set up in fieldInfo, if at least 1 segment has + * payloads for this field. + */ + public void testMergeWithPayloads() throws Exception { + + final FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED); + ft1.setStoreTermVectors(true); + ft1.setStoreTermVectorOffsets(true); + ft1.setStoreTermVectorPositions(true); + ft1.setStoreTermVectorPayloads(true); + ft1.freeze(); + + Directory dir = newDirectory(); + final int numDocsInSegment = 10; + IndexWriterConfig indexWriterConfig = + new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(numDocsInSegment); + IndexWriter writer = new IndexWriter(dir, indexWriterConfig); + + boolean hasPayloads1 = random().nextBoolean(); + boolean hasPayloads2 = !hasPayloads1; Review comment: Can you test both cases everytime instead of relying on randomization? ########## File path: lucene/core/src/test/org/apache/lucene/index/TestTermVectors.java ########## @@ -16,70 +16,22 @@ */ package org.apache.lucene.index; +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; + import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.English; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import org.junit.AfterClass; -import org.junit.BeforeClass; public class TestTermVectors extends LuceneTestCase { - private static IndexReader reader; - private static Directory directory; - - @BeforeClass - public static void beforeClass() throws Exception { - directory = newDirectory(); - RandomIndexWriter writer = - new RandomIndexWriter( - random(), - directory, - newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)) - .setMergePolicy(newLogMergePolicy())); - // writer.setNoCFSRatio(1.0); - // writer.infoStream = System.out; - for (int i = 0; i < 1000; i++) { - Document doc = new Document(); - FieldType ft = new FieldType(TextField.TYPE_STORED); - int mod3 = i % 3; - int mod2 = i % 2; - if (mod2 == 0 && mod3 == 0) { - ft.setStoreTermVectors(true); - ft.setStoreTermVectorOffsets(true); - ft.setStoreTermVectorPositions(true); - } else if (mod2 == 0) { - ft.setStoreTermVectors(true); - ft.setStoreTermVectorPositions(true); - } else if (mod3 == 0) { - ft.setStoreTermVectors(true); - ft.setStoreTermVectorOffsets(true); - } else { - ft.setStoreTermVectors(true); - } - doc.add(new Field("field", English.intToEnglish(i), ft)); - // test no term vectors too - doc.add(new TextField("noTV", English.intToEnglish(i), Field.Store.YES)); - writer.addDocument(doc); - } - reader = writer.getReader(); - writer.close(); - } - - @AfterClass - public static void afterClass() throws Exception { - reader.close(); - directory.close(); - reader = null; - directory = null; - } Review comment: oh, this was unused? ########## File path: lucene/core/src/test/org/apache/lucene/index/TestTermVectors.java ########## @@ -166,4 +118,97 @@ public void testFullMergeAddIndexesReader() throws Exception { verifyIndex(target); IOUtils.close(target, input[0], input[1]); } + + /** + * Assert that a merged segment has payloads set up in fieldInfo, if at least 1 segment has + * payloads for this field. + */ + public void testMergeWithPayloads() throws Exception { + + final FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED); + ft1.setStoreTermVectors(true); + ft1.setStoreTermVectorOffsets(true); + ft1.setStoreTermVectorPositions(true); + ft1.setStoreTermVectorPayloads(true); + ft1.freeze(); + + Directory dir = newDirectory(); + final int numDocsInSegment = 10; + IndexWriterConfig indexWriterConfig = + new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(numDocsInSegment); + IndexWriter writer = new IndexWriter(dir, indexWriterConfig); + + boolean hasPayloads1 = random().nextBoolean(); + boolean hasPayloads2 = !hasPayloads1; + TokenStreamGenerator tkg1 = new TokenStreamGenerator(hasPayloads1); + TokenStreamGenerator tkg2 = new TokenStreamGenerator(hasPayloads2); + // create one segment with payloads, and another without payloads + for (int i = 0; i < numDocsInSegment; i++) { + Document doc = new Document(); + doc.add(new Field("c", tkg1.newTokenStream(), ft1)); + writer.addDocument(doc); + } + for (int i = 0; i < numDocsInSegment; i++) { + Document doc = new Document(); + doc.add(new Field("c", tkg2.newTokenStream(), ft1)); + writer.addDocument(doc); + } + + IndexReader reader1 = writer.getReader(); + assertEquals( Review comment: Can you assert on the number of leaves? ```suggestion assertEquals(2, reader1.leaves().size()); assertEquals( ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org