This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch 3.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit d0fb7f5fd215c9f60dabb123169ef2df71fc6506 Merge: 040a32c640 44b97f36c4 Author: Dave Marion <dlmar...@apache.org> AuthorDate: Thu Jan 2 21:21:55 2025 +0000 Merge branch '2.1' into 3.1 .../core/fate/zookeeper/DistributedReadWriteLock.java | 3 ++- .../main/java/org/apache/accumulo/server/util/Admin.java | 14 ++++++++++++-- .../apache/accumulo/tserver/log/TabletServerLogger.java | 12 +++++++++--- 3 files changed, 23 insertions(+), 6 deletions(-) diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java index 2aa1e87aa8,46970b4c21..4409602e5e --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java @@@ -293,66 -325,37 +293,72 @@@ public class TabletServerLogger log.error("Failed to close WAL after it failed to open", e); } - // it's possible the log was advertised in ZK even though we got an - // exception. If there's a chance the WAL marker may have been created, - // this will ensure it's closed. Either the close will be written and - // the GC will clean it up, or the tserver is about to die due to sesson - // expiration and the GC will also clean it up. try { - tserver.walogClosed(alog); + Path path = alog.getPath(); + if (fs.exists(path)) { + fs.delete(path); + } } catch (Exception e) { - log.error("Failed to close WAL that failed to open: " + fileName, e); + log.warn("Failed to delete a WAL that failed to open", e); } + } - try { - nextLog.offer(t, 12, TimeUnit.HOURS); - } catch (InterruptedException ex) { - // Throw an Error, not an Exception, so the AccumuloUncaughtExceptionHandler - // will log this then halt the VM. - throw new Error("Next log maker thread interrupted", ex); - } + try { + nextLog.offer(t, 12, TimeUnit.HOURS); + } catch (InterruptedException ex) { - // ignore ++ // Throw an Error, not an Exception, so the AccumuloUncaughtExceptionHandler ++ // will log this then halt the VM. ++ throw new Error("Next log maker thread interrupted", ex); + } + + continue; + } - continue; + log.debug("Created next WAL {}", alog.getLogEntry()); + + try { + tserver.addNewLogMarker(alog); + } catch (Exception t) { + log.error("Failed to add new WAL marker for " + alog.getLogEntry(), t); + + try { + // Intentionally not deleting walog because it may have been advertised in ZK. See + // #949 + alog.close(); + } catch (Exception e) { + log.error("Failed to close WAL after it failed to open", e); } + // it's possible the log was advertised in ZK even though we got an + // exception. If there's a chance the WAL marker may have been created, + // this will ensure it's closed. Either the close will be written and + // the GC will clean it up, or the tserver is about to die due to sesson + // expiration and the GC will also clean it up. try { - while (!nextLog.offer(alog, 12, TimeUnit.HOURS)) { - log.info("Our WAL was not used for 12 hours: {}", fileName); - } - } catch (InterruptedException e) { + tserver.walogClosed(alog); + } catch (Exception e) { + log.error("Failed to close WAL that failed to open: " + alog.getLogEntry(), e); + } + + try { + nextLog.offer(t, 12, TimeUnit.HOURS); + } catch (InterruptedException ex) { - // ignore + // Throw an Error, not an Exception, so the AccumuloUncaughtExceptionHandler + // will log this then halt the VM. - throw new Error("Next log maker thread interrupted", e); ++ throw new Error("Next log maker thread interrupted", ex); } + + continue; + } + + try { + while (!nextLog.offer(alog, 12, TimeUnit.HOURS)) { + log.info("Our WAL was not used for 12 hours: {}", alog.getLogEntry()); + } + } catch (InterruptedException e) { - // ignore - server is shutting down ++ // Throw an Error, not an Exception, so the AccumuloUncaughtExceptionHandler ++ // will log this then halt the VM. ++ throw new Error("Next log maker thread interrupted", e); } } });