mikemccand commented on a change in pull request #128: URL: https://github.com/apache/lucene/pull/128#discussion_r694932614
########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -450,6 +480,14 @@ public void setChecksumsOnly(boolean v) { private boolean checksumsOnly; + /** Set threadCount used for parallelizing index integrity checking. */ + public void setThreadCount(int tc) { + threadCount = tc; Review comment: Maybe validate the argument? It must be >= 1 at least? ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -3787,8 +4032,12 @@ public int doCheck(Options opts) throws IOException, InterruptedException { setDoSlowChecks(opts.doSlowChecks); setChecksumsOnly(opts.doChecksumsOnly); setInfoStream(opts.out, opts.verbose); + // when threadCount was not provided via command line, override it with 0 to turn of concurrent Review comment: s/`of`/`off` ########## File path: lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java ########## @@ -321,6 +326,11 @@ public static void syncConcurrentMerges(MergeScheduler ms) { checker.setDoSlowChecks(doSlowChecks); checker.setFailFast(failFast); checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false); + if (concurrent) { + checker.setThreadCount(RandomizedTest.randomIntBetween(1, 5)); + } else { + checker.setThreadCount(0); Review comment: Hmm, shouldn't we use `1` instead of `0` to mean "check sequentially"? And maybe `0` should not be allowed? ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -3622,6 +3860,7 @@ public static void main(String[] args) throws IOException, InterruptedException boolean doSlowChecks = false; Review comment: [Pre-existing] We could remove all these `= false` and `= null`. Hmm maybe there is a static `ecj` checker for this. ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -181,6 +193,9 @@ /** True if we were able to open a CodecReader on this segment. */ public boolean openReaderPassed; + /** doc count in this segment */ Review comment: Yeah let's just keep it the (inconsistent) way it was before for now. ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -605,209 +681,115 @@ public Status checkIndex(List<String> onlySegments) throws IOException { result.newSegments.clear(); result.maxSegmentName = -1; - for (int i = 0; i < numSegments; i++) { - final SegmentCommitInfo info = sis.info(i); - long segmentName = Long.parseLong(info.info.name.substring(1), Character.MAX_RADIX); - if (segmentName > result.maxSegmentName) { - result.maxSegmentName = segmentName; - } - if (onlySegments != null && !onlySegments.contains(info.info.name)) { - continue; - } - Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); - result.segmentInfos.add(segInfoStat); - msg( - infoStream, - " " - + (1 + i) - + " of " - + numSegments - + ": name=" - + info.info.name - + " maxDoc=" - + info.info.maxDoc()); - segInfoStat.name = info.info.name; - segInfoStat.maxDoc = info.info.maxDoc(); - - final Version version = info.info.getVersion(); - if (info.info.maxDoc() <= 0) { - throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc()); - } - - int toLoseDocCount = info.info.maxDoc(); - - SegmentReader reader = null; - - try { - msg(infoStream, " version=" + (version == null ? "3.0" : version)); - msg(infoStream, " id=" + StringHelper.idToString(info.info.getId())); - final Codec codec = info.info.getCodec(); - msg(infoStream, " codec=" + codec); - segInfoStat.codec = codec; - msg(infoStream, " compound=" + info.info.getUseCompoundFile()); - segInfoStat.compound = info.info.getUseCompoundFile(); - msg(infoStream, " numFiles=" + info.files().size()); - Sort indexSort = info.info.getIndexSort(); - if (indexSort != null) { - msg(infoStream, " sort=" + indexSort); - } - segInfoStat.numFiles = info.files().size(); - segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.); - msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); - Map<String, String> diagnostics = info.info.getDiagnostics(); - segInfoStat.diagnostics = diagnostics; - if (diagnostics.size() > 0) { - msg(infoStream, " diagnostics = " + diagnostics); - } - - if (!info.hasDeletions()) { - msg(infoStream, " no deletions"); - segInfoStat.hasDeletions = false; - } else { - msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]"); - segInfoStat.hasDeletions = true; - segInfoStat.deletionsGen = info.getDelGen(); + // checks segments sequentially + if (executorService == null) { + for (int i = 0; i < numSegments; i++) { + final SegmentCommitInfo info = sis.info(i); + updateMaxSegmentName(result, info); + if (onlySegments != null && !onlySegments.contains(info.info.name)) { + continue; } - long startOpenReaderNS = System.nanoTime(); - if (infoStream != null) infoStream.print(" test: open reader........."); - reader = new SegmentReader(info, sis.getIndexCreatedVersionMajor(), IOContext.DEFAULT); msg( infoStream, - String.format( - Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startOpenReaderNS))); + (1 + i) + + " of " + + numSegments + + ": name=" + + info.info.name + + " maxDoc=" + + info.info.maxDoc()); + Status.SegmentInfoStatus segmentInfoStatus = testSegment(sis, info, infoStream); + + processSegmentInfoStatusResult(result, info, segmentInfoStatus); + } + } else { + ByteArrayOutputStream[] outputs = new ByteArrayOutputStream[numSegments]; + @SuppressWarnings({"unchecked", "rawtypes"}) + CompletableFuture<Status.SegmentInfoStatus>[] futures = new CompletableFuture[numSegments]; + + // checks segments concurrently + List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>(); + for (SegmentCommitInfo sci : sis) { + segmentCommitInfos.add(sci); + } - segInfoStat.openReaderPassed = true; + // sort segmentCommitInfos by segment size, as smaller segment tends to finish faster, and Review comment: Ahhh OK I see -- this is a tricky tradeoff of seeing output sooner, versus finishing the overall `CheckIndex` sooner :) Let's leave it as it is here (seeing output sooner)? ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -605,209 +680,103 @@ public Status checkIndex(List<String> onlySegments) throws IOException { result.newSegments.clear(); result.maxSegmentName = -1; - for (int i = 0; i < numSegments; i++) { - final SegmentCommitInfo info = sis.info(i); - long segmentName = Long.parseLong(info.info.name.substring(1), Character.MAX_RADIX); - if (segmentName > result.maxSegmentName) { - result.maxSegmentName = segmentName; - } - if (onlySegments != null && !onlySegments.contains(info.info.name)) { - continue; - } - Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); - result.segmentInfos.add(segInfoStat); - msg( - infoStream, - " " - + (1 + i) - + " of " - + numSegments - + ": name=" - + info.info.name - + " maxDoc=" - + info.info.maxDoc()); - segInfoStat.name = info.info.name; - segInfoStat.maxDoc = info.info.maxDoc(); - - final Version version = info.info.getVersion(); - if (info.info.maxDoc() <= 0) { - throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc()); - } - - int toLoseDocCount = info.info.maxDoc(); - - SegmentReader reader = null; - - try { - msg(infoStream, " version=" + (version == null ? "3.0" : version)); - msg(infoStream, " id=" + StringHelper.idToString(info.info.getId())); - final Codec codec = info.info.getCodec(); - msg(infoStream, " codec=" + codec); - segInfoStat.codec = codec; - msg(infoStream, " compound=" + info.info.getUseCompoundFile()); - segInfoStat.compound = info.info.getUseCompoundFile(); - msg(infoStream, " numFiles=" + info.files().size()); - Sort indexSort = info.info.getIndexSort(); - if (indexSort != null) { - msg(infoStream, " sort=" + indexSort); - } - segInfoStat.numFiles = info.files().size(); - segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.); - msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); - Map<String, String> diagnostics = info.info.getDiagnostics(); - segInfoStat.diagnostics = diagnostics; - if (diagnostics.size() > 0) { - msg(infoStream, " diagnostics = " + diagnostics); + // checks segments sequentially + if (executorService == null) { Review comment: OK that is really sneaky deadlock! Maybe `MDW#close` should not hold its monitor lock when it calls `CheckIndex`, but let's not try to solve that here. We can refactor in the future. This change is already a good step forwards -- progress not perfection! ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -488,8 +503,35 @@ public Status checkIndex() throws IOException { * quite a long time to run. */ public Status checkIndex(List<String> onlySegments) throws IOException { Review comment: > Oh I didn't realize it would spawn multiple jvm processes, This is just how Lucene's test infrastructure runs tests -- it spawns multiple JVMs, each of which is running one Lucene test at a time, but that test may use (often uses?) its own threads, including here in `CheckIndex` if we make it concurrent by default. ########## File path: lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java ########## @@ -895,7 +895,11 @@ public synchronized void close() throws IOException { System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); } - TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, null); + // Methods in MockDirectoryWrapper hold locks on this, which will cause deadlock when + // TestUtil#checkIndex checks segment concurrently using another thread, but making + // call back to synchronized methods such as MockDirectoryWrapper#fileLength. + // Hence passing concurrent = false to this method to turn off concurrent checks. + TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true, false, null); Review comment: Maybe open a follow-on issue to fix this sync situation so that we could, randomly, sometimes use concurrency in `CheckIndex` from tests? Maybe we could start by making some of the `TestUtil.checkIndex` use concurrency, just not the one that MDW invokes? ########## File path: lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java ########## @@ -54,4 +65,137 @@ public void testChecksumsOnlyVerbose() throws IOException { public void testObtainsLock() throws IOException { testObtainsLock(directory); } + + @Test + public void testCheckIndexAllValid() throws Exception { Review comment: Thank you for this nice unit test! Confirming the textual output syntax from `CheckIndex`. ########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -843,6 +825,258 @@ public Status checkIndex(List<String> onlySegments) throws IOException { return result; } + private void updateMaxSegmentName(Status result, SegmentCommitInfo info) { + long segmentName = Long.parseLong(info.info.name.substring(1), Character.MAX_RADIX); + if (segmentName > result.maxSegmentName) { + result.maxSegmentName = segmentName; + } + } + + private void processSegmentInfoStatusResult( + Status result, SegmentCommitInfo info, Status.SegmentInfoStatus segmentInfoStatus) { + result.segmentInfos.add(segmentInfoStatus); + if (segmentInfoStatus.error != null) { + result.totLoseDocCount += segmentInfoStatus.toLoseDocCount; + result.numBadSegments++; + } else { + // Keeper + result.newSegments.add(info.clone()); + } + } + + private <R> CompletableFuture<R> runAsyncSegmentCheck( + Callable<R> asyncCallable, ExecutorService executorService) { + return CompletableFuture.supplyAsync(callableToSupplier(asyncCallable), executorService); + } + + private <T> Supplier<T> callableToSupplier(Callable<T> callable) { + return () -> { + try { + return callable.call(); + } catch (RuntimeException | Error e) { + throw e; + } catch (Throwable e) { + throw new CompletionException(e); + } + }; + } + + private Status.SegmentInfoStatus testSegment( + SegmentInfos sis, SegmentCommitInfo info, PrintStream infoStream) throws IOException { + Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); + segInfoStat.name = info.info.name; + segInfoStat.maxDoc = info.info.maxDoc(); + + final Version version = info.info.getVersion(); + if (info.info.maxDoc() <= 0) { + throw new CheckIndexException(" illegal number of documents: maxDoc=" + info.info.maxDoc()); + } + + int toLoseDocCount = info.info.maxDoc(); + + SegmentReader reader = null; + + try { + msg(infoStream, " version=" + (version == null ? "3.0" : version)); + msg(infoStream, " id=" + StringHelper.idToString(info.info.getId())); + final Codec codec = info.info.getCodec(); + msg(infoStream, " codec=" + codec); + segInfoStat.codec = codec; + msg(infoStream, " compound=" + info.info.getUseCompoundFile()); + segInfoStat.compound = info.info.getUseCompoundFile(); + msg(infoStream, " numFiles=" + info.files().size()); + Sort indexSort = info.info.getIndexSort(); + if (indexSort != null) { + msg(infoStream, " sort=" + indexSort); + } + segInfoStat.numFiles = info.files().size(); + segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.); + // nf#format is not thread-safe, and would generate random non valid results in concurrent + // setting + synchronized (nf) { + msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); + } + Map<String, String> diagnostics = info.info.getDiagnostics(); + segInfoStat.diagnostics = diagnostics; + if (diagnostics.size() > 0) { + msg(infoStream, " diagnostics = " + diagnostics); + } + + if (!info.hasDeletions()) { + msg(infoStream, " no deletions"); + segInfoStat.hasDeletions = false; + } else { + msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]"); + segInfoStat.hasDeletions = true; + segInfoStat.deletionsGen = info.getDelGen(); + } + + long startOpenReaderNS = System.nanoTime(); + if (infoStream != null) infoStream.print(" test: open reader........."); + reader = new SegmentReader(info, sis.getIndexCreatedVersionMajor(), IOContext.DEFAULT); + msg( + infoStream, + String.format( + Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startOpenReaderNS))); + + segInfoStat.openReaderPassed = true; + + long startIntegrityNS = System.nanoTime(); + if (infoStream != null) infoStream.print(" test: check integrity....."); + reader.checkIntegrity(); + msg( + infoStream, + String.format( + Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startIntegrityNS))); + + if (reader.maxDoc() != info.info.maxDoc()) { + throw new CheckIndexException( + "SegmentReader.maxDoc() " + + reader.maxDoc() + + " != SegmentInfo.maxDoc " + + info.info.maxDoc()); + } + + final int numDocs = reader.numDocs(); + toLoseDocCount = numDocs; + + if (reader.hasDeletions()) { + if (reader.numDocs() != info.info.maxDoc() - info.getDelCount()) { + throw new CheckIndexException( + "delete count mismatch: info=" + + (info.info.maxDoc() - info.getDelCount()) + + " vs reader=" + + reader.numDocs()); + } + if ((info.info.maxDoc() - reader.numDocs()) > reader.maxDoc()) { + throw new CheckIndexException( + "too many deleted docs: maxDoc()=" + + reader.maxDoc() + + " vs del count=" + + (info.info.maxDoc() - reader.numDocs())); + } + if (info.info.maxDoc() - reader.numDocs() != info.getDelCount()) { + throw new CheckIndexException( + "delete count mismatch: info=" + + info.getDelCount() + + " vs reader=" + + (info.info.maxDoc() - reader.numDocs())); + } + } else { + if (info.getDelCount() != 0) { + throw new CheckIndexException( + "delete count mismatch: info=" + + info.getDelCount() + + " vs reader=" + + (info.info.maxDoc() - reader.numDocs())); + } + } + + if (checksumsOnly == false) { + // This redundant assignment is done to make compiler happy + SegmentReader finalReader = reader; Review comment: Egads, why does compiler insist on that? I don't see any anonymous classes / lambda bodies trying to reference `reader`/`finalReader`? Is this a leftover? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org