zacharymorn commented on a change in pull request #128: URL: https://github.com/apache/lucene/pull/128#discussion_r642646093
########## File path: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java ########## @@ -605,209 +680,103 @@ public Status checkIndex(List<String> onlySegments) throws IOException { result.newSegments.clear(); result.maxSegmentName = -1; - for (int i = 0; i < numSegments; i++) { - final SegmentCommitInfo info = sis.info(i); - long segmentName = Long.parseLong(info.info.name.substring(1), Character.MAX_RADIX); - if (segmentName > result.maxSegmentName) { - result.maxSegmentName = segmentName; - } - if (onlySegments != null && !onlySegments.contains(info.info.name)) { - continue; - } - Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); - result.segmentInfos.add(segInfoStat); - msg( - infoStream, - " " - + (1 + i) - + " of " - + numSegments - + ": name=" - + info.info.name - + " maxDoc=" - + info.info.maxDoc()); - segInfoStat.name = info.info.name; - segInfoStat.maxDoc = info.info.maxDoc(); - - final Version version = info.info.getVersion(); - if (info.info.maxDoc() <= 0) { - throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc()); - } - - int toLoseDocCount = info.info.maxDoc(); - - SegmentReader reader = null; - - try { - msg(infoStream, " version=" + (version == null ? "3.0" : version)); - msg(infoStream, " id=" + StringHelper.idToString(info.info.getId())); - final Codec codec = info.info.getCodec(); - msg(infoStream, " codec=" + codec); - segInfoStat.codec = codec; - msg(infoStream, " compound=" + info.info.getUseCompoundFile()); - segInfoStat.compound = info.info.getUseCompoundFile(); - msg(infoStream, " numFiles=" + info.files().size()); - Sort indexSort = info.info.getIndexSort(); - if (indexSort != null) { - msg(infoStream, " sort=" + indexSort); - } - segInfoStat.numFiles = info.files().size(); - segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.); - msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); - Map<String, String> diagnostics = info.info.getDiagnostics(); - segInfoStat.diagnostics = diagnostics; - if (diagnostics.size() > 0) { - msg(infoStream, " diagnostics = " + diagnostics); + // checks segments sequentially + if (executorService == null) { + for (int i = 0; i < numSegments; i++) { + final SegmentCommitInfo info = sis.info(i); + updateMaxSegmentName(result, info); + if (onlySegments != null && !onlySegments.contains(info.info.name)) { + continue; } - if (!info.hasDeletions()) { - msg(infoStream, " no deletions"); - segInfoStat.hasDeletions = false; - } else { - msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]"); - segInfoStat.hasDeletions = true; - segInfoStat.deletionsGen = info.getDelGen(); - } - - long startOpenReaderNS = System.nanoTime(); - if (infoStream != null) infoStream.print(" test: open reader........."); - reader = new SegmentReader(info, sis.getIndexCreatedVersionMajor(), IOContext.DEFAULT); msg( infoStream, - String.format( - Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startOpenReaderNS))); + (1 + i) + + " of " + + numSegments + + ": name=" + + info.info.name + + " maxDoc=" + + info.info.maxDoc()); + Status.SegmentInfoStatus segmentInfoStatus = testSegment(sis, info, infoStream); + + processSegmentInfoStatusResult(result, info, segmentInfoStatus); + } + } else { + ByteArrayOutputStream[] outputs = new ByteArrayOutputStream[numSegments]; + @SuppressWarnings({"unchecked", "rawtypes"}) + CompletableFuture<Status.SegmentInfoStatus>[] futures = new CompletableFuture[numSegments]; + + // checks segments concurrently + for (int i = 0; i < numSegments; i++) { + final SegmentCommitInfo info = sis.info(i); + updateMaxSegmentName(result, info); + if (onlySegments != null && !onlySegments.contains(info.info.name)) { + continue; + } - segInfoStat.openReaderPassed = true; + SegmentInfos finalSis = sis; - long startIntegrityNS = System.nanoTime(); - if (infoStream != null) infoStream.print(" test: check integrity....."); - reader.checkIntegrity(); + ByteArrayOutputStream output = new ByteArrayOutputStream(); + PrintStream stream; + if (i > 0) { + // buffer the messages for segment starting from the 2nd one so that they can later be + // printed in order + stream = new PrintStream(output, true, IOUtils.UTF_8); + } else { + // optimize for first segment to print real-time Review comment: > Hmm, could we instead just output the full segment's output as each finishes? So we would presumably see small segments finish first, then all checks for that small segment are printed? > Also, note that the "first segment" might not necessarily always be the largest! The way our default merge policy works, that tends to be the case, but users can customize the merge policy, and we recently added a feature to let IndexWriter customize how it sorts the segments. We should maybe sort the segments by descending docCount and work on them (concurrently) in that order? Hmm ok, and good to know about the custom segment sorting feature! The current implementation actually ensures the output to follow the same segment order as if it were executed sequentially, hence the little trick there to print first segment output early. But yeah I guess this order requirement is not really needed, especially under concurrent setting. Let me update that to give better output (and maybe even fail faster if smaller segment encounters error early!). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org