[ https://issues.apache.org/jira/browse/GEODE-8536?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17229581#comment-17229581 ]
ASF subversion and git services commented on GEODE-8536: -------------------------------------------------------- Commit e3ea0ad8b33f3954e813d5e97bcaee018ad6a2c5 in geode's branch refs/heads/support/1.12 from Donal Evans [ https://gitbox.apache.org/repos/asf?p=geode.git;h=e3ea0ad ] GEODE-8536: Allow limited retries when creating Lucene IndexWriter (#5659) Authored-by: Donal Evans <doev...@vmware.com> (cherry picked from commit 872718ec9d119e332c328caf4493bdf8e8a83dcf) > StackOverflow can occur when Lucene IndexWriter is unable to be created > ----------------------------------------------------------------------- > > Key: GEODE-8536 > URL: https://issues.apache.org/jira/browse/GEODE-8536 > Project: Geode > Issue Type: Bug > Components: functions, lucene > Affects Versions: 1.12.0, 1.13.0, 1.14.0 > Reporter: Donal Evans > Assignee: Donal Evans > Priority: Major > Labels: pull-request-available > Fix For: 1.12.1, 1.14.0, 1.13.1 > > > If, during a call to IndexRepositoryFactory.computeIndexRepository(), an > IOException is encountered when attempting to construct an IndexWriter, the > function retry logic will reattempt the execution. This allows transient > exceptions caused by concurrent modification of the fileAndChunk region to be > ignored and subsequent executions to succeed (see GEODE-7703). However, if > the IOException is consistently thrown, the infinitely retrying function can > cause a StackOverflow: > {noformat} > java.lang.StackOverflowError > at > org.apache.geode.SystemFailure.startWatchDog(SystemFailure.java:320) > at > org.apache.geode.SystemFailure.notifyWatchDog(SystemFailure.java:758) > at org.apache.geode.SystemFailure.setFailure(SystemFailure.java:813) > at > org.apache.geode.SystemFailure.initiateFailure(SystemFailure.java:790) > at > org.apache.geode.internal.InternalDataSerializer.invokeToData(InternalDataSerializer.java:2251) > at > org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2031) > at > org.apache.geode.DataSerializer.writeObject(DataSerializer.java:2839) > at > org.apache.geode.internal.cache.partitioned.PartitionedRegionFunctionStreamingMessage.toData(PartitionedRegionFunctionStreamingMessage.java:192) > at > org.apache.geode.internal.serialization.internal.DSFIDSerializerImpl.invokeToData(DSFIDSerializerImpl.java:213) > at > org.apache.geode.internal.serialization.internal.DSFIDSerializerImpl.write(DSFIDSerializerImpl.java:137) > at > org.apache.geode.internal.InternalDataSerializer.writeDSFID(InternalDataSerializer.java:1484) > at > org.apache.geode.internal.tcp.MsgStreamer.writeMessage(MsgStreamer.java:247) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToMany(DirectChannel.java:306) > at > org.apache.geode.distributed.internal.direct.DirectChannel.sendToOne(DirectChannel.java:182) > at > org.apache.geode.distributed.internal.direct.DirectChannel.send(DirectChannel.java:511) > at > org.apache.geode.distributed.internal.DistributionImpl.directChannelSend(DistributionImpl.java:346) > at > org.apache.geode.distributed.internal.DistributionImpl.send(DistributionImpl.java:291) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2058) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:1986) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.sendMessage(ClusterDistributionManager.java:2023) > at > org.apache.geode.distributed.internal.ClusterDistributionManager.putOutgoing(ClusterDistributionManager.java:1083) > at > org.apache.geode.internal.cache.execute.PartitionedRegionFunctionResultWaiter.getPartitionedDataFrom(PartitionedRegionFunctionResultWaiter.java:89) > at > org.apache.geode.internal.cache.PartitionedRegion.executeOnAllBuckets(PartitionedRegion.java:4079) > at > org.apache.geode.internal.cache.PartitionedRegion.executeFunction(PartitionedRegion.java:3583) > at > org.apache.geode.internal.cache.execute.PartitionedRegionFunctionExecutor.executeFunction(PartitionedRegionFunctionExecutor.java:220) > at > org.apache.geode.internal.cache.execute.AbstractExecution.execute(AbstractExecution.java:376) > at > org.apache.geode.internal.cache.execute.AbstractExecution.execute(AbstractExecution.java:359) > at > org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResultInternal(LocalResultCollectorImpl.java:139) > at > org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53) > at > org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResult(LocalResultCollectorImpl.java:112) > at > org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResultInternal(PRFunctionStreamingResultCollector.java:219) > at > org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53) > at > org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResult(PRFunctionStreamingResultCollector.java:88) > at > org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResultInternal(LocalResultCollectorImpl.java:141) > at > org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53) > at > org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResult(LocalResultCollectorImpl.java:112) > at > org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResultInternal(PRFunctionStreamingResultCollector.java:219) > at > org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53) > {noformat} > The underlying exception in this case is a FileNotFoundException thrown when > attempting to retrieve a Lucene file from the fileAndChunk region. > {noformat} > [warn 2020/07/28 23:49:55.375 PDT <Pooled Waiting Message Processor 2> > tid=0xab] Exception thrown while constructing Lucene Index for bucket:16 for > file region:/_PR/_Bindex#_partitionedRegion.files_16 > org.apache.lucene.index.CorruptIndexException: Unexpected file read error > while reading index. (resource=BufferedChecksumIndexInput(segments_4s)) > at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:290) > at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:165) > at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:974) > at > org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.buildIndexWriter(IndexRepositoryFactory.java:152) > at > org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.finishComputingRepository(IndexRepositoryFactory.java:116) > at > org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.computeIndexRepository(IndexRepositoryFactory.java:65) > at > org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.computeRepository(PartitionedRepositoryManager.java:151) > at > org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.lambda$computeRepository$1(PartitionedRepositoryManager.java:170) > at java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1892) > at > org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.computeRepository(PartitionedRepositoryManager.java:162) > at > org.apache.geode.cache.lucene.internal.LuceneBucketListener.lambda$afterPrimary$0(LuceneBucketListener.java:40) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at > org.apache.geode.distributed.internal.ClusterOperationExecutors.runUntilShutdown(ClusterOperationExecutors.java:442) > at > org.apache.geode.distributed.internal.ClusterOperationExecutors.doWaitingThread(ClusterOperationExecutors.java:411) > at > org.apache.geode.logging.internal.executors.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:119) > at java.lang.Thread.run(Thread.java:748) > Caused by: java.io.FileNotFoundException: _2p.si > at > org.apache.geode.cache.lucene.internal.filesystem.FileSystem.getFile(FileSystem.java:101) > at > org.apache.geode.cache.lucene.internal.directory.RegionDirectory.openInput(RegionDirectory.java:115) > at org.apache.lucene.store.Directory.openChecksumInput(Directory.java:137) > at > org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat.read(Lucene62SegmentInfoFormat.java:89) > at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:357) > at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:288) > ... 16 more > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)