This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit d185870c31d59c24f0f91f0a8650d89733615b22 Merge: be70c2a985 0553feb4c7 Author: Dave Marion <dlmar...@apache.org> AuthorDate: Mon Jan 6 20:00:53 2025 +0000 Merge branch '3.1' .../accumulo/core/lock/ServiceLockSupport.java | 152 +++++++++++++++++++++ .../org/apache/accumulo/compactor/Compactor.java | 19 +-- .../apache/accumulo/gc/SimpleGarbageCollector.java | 41 +++--- .../java/org/apache/accumulo/manager/Manager.java | 69 +--------- .../java/org/apache/accumulo/monitor/Monitor.java | 61 +-------- .../org/apache/accumulo/tserver/ScanServer.java | 23 +--- .../org/apache/accumulo/tserver/TabletServer.java | 23 +--- 7 files changed, 189 insertions(+), 199 deletions(-) diff --cc server/compactor/src/main/java/org/apache/accumulo/compactor/Compactor.java index ca19dec098,3d60d89fb1..214c3af561 --- a/server/compactor/src/main/java/org/apache/accumulo/compactor/Compactor.java +++ b/server/compactor/src/main/java/org/apache/accumulo/compactor/Compactor.java @@@ -80,8 -80,7 +79,9 @@@ import org.apache.accumulo.core.lock.Se import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptor; import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptors; import org.apache.accumulo.core.lock.ServiceLockData.ThriftService; +import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; + import org.apache.accumulo.core.lock.ServiceLockSupport.ServiceLockWatcher; +import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.ReferencedTabletFile; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.DataFileValue; @@@ -293,22 -270,10 +292,10 @@@ public class Compactor extends Abstract throw e; } - compactorLock = new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), - ServiceLock.path(zPath), compactorId); + compactorLock = + new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), path, compactorId); - LockWatcher lw = new LockWatcher() { - @Override - public void lostLock(final LockLossReason reason) { - Halt.halt(1, () -> { - LOG.error("Compactor lost lock (reason = {}), exiting.", reason); - getContext().getLowMemoryDetector().logGCInfo(getConfiguration()); - }); - } - - @Override - public void unableToMonitorLockNode(final Exception e) { - Halt.halt(1, () -> LOG.error("Lost ability to monitor Compactor lock, exiting.", e)); - } - }; + LockWatcher lw = new ServiceLockWatcher("compactor", () -> false, + (name) -> getContext().getLowMemoryDetector().logGCInfo(getConfiguration())); try { for (int i = 0; i < 25; i++) { diff --cc server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java index 65f80b1864,ab5cc89df2..e8f3dcb3db --- a/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java @@@ -51,11 -43,7 +50,10 @@@ import org.apache.accumulo.core.metadat import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; import org.apache.accumulo.core.metrics.MetricsInfo; import org.apache.accumulo.core.securityImpl.thrift.TCredentials; +import org.apache.accumulo.core.spi.balancer.TableLoadBalancer; import org.apache.accumulo.core.trace.TraceUtil; - import org.apache.accumulo.core.util.Halt; +import org.apache.accumulo.core.util.Timer; +import org.apache.accumulo.core.util.compaction.ExternalCompactionUtil; import org.apache.accumulo.core.util.threads.ThreadPools; import org.apache.accumulo.gc.metrics.GcCycleMetrics; import org.apache.accumulo.gc.metrics.GcMetrics; @@@ -368,33 -332,34 +366,34 @@@ public class SimpleGarbageCollector ext } private void getZooLock(HostAndPort addr) throws KeeperException, InterruptedException { - var path = ServiceLock.path(getContext().getZooKeeperRoot() + Constants.ZGC_LOCK); + var path = getContext().getServerPaths().createGarbageCollectorPath(); - LockWatcher lockWatcher = new LockWatcher() { - @Override - public void lostLock(LockLossReason reason) { - Halt.halt("GC lock in zookeeper lost (reason = " + reason + "), exiting!", 1); - } + UUID zooLockUUID = UUID.randomUUID(); + gcLock = new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), path, zooLockUUID); + HAServiceLockWatcher gcLockWatcher = new HAServiceLockWatcher("gc"); - @Override - public void unableToMonitorLockNode(final Exception e) { - // ACCUMULO-3651 Level changed to error and FATAL added to message for slf4j compatibility - Halt.halt(-1, () -> log.error("FATAL: No longer able to monitor lock node ", e)); + while (true) { - gcLock.lock(gcLockWatcher, - new ServiceLockData(zooLockUUID, addr.toString(), ThriftService.GC)); ++ gcLock.lock(gcLockWatcher, new ServiceLockData(zooLockUUID, addr.toString(), ThriftService.GC, ++ this.getResourceGroup())); + gcLockWatcher.waitForChange(); + + if (gcLockWatcher.isLockAcquired()) { + break; } - }; - UUID zooLockUUID = UUID.randomUUID(); - gcLock = new ServiceLock(getContext().getZooReaderWriter().getZooKeeper(), path, zooLockUUID); - while (true) { - if (gcLock.tryLock(lockWatcher, new ServiceLockData(zooLockUUID, addr.toString(), - ThriftService.GC, this.getResourceGroup()))) { - log.debug("Got GC ZooKeeper lock"); - return; + if (!gcLockWatcher.isFailedToAcquireLock()) { + throw new IllegalStateException("gc lock in unknown state"); } + + gcLock.tryToCancelAsyncLockOrUnlock(); + log.debug("Failed to get GC ZooKeeper lock, will retry"); - sleepUninterruptibly(1, TimeUnit.SECONDS); + sleepUninterruptibly(1000, TimeUnit.MILLISECONDS); } + + log.info("Got GC lock."); + } private HostAndPort startStatsService() { diff --cc server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 062e306202,c52f6efcfb..936319f880 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@@ -81,14 -72,12 +81,14 @@@ import org.apache.accumulo.core.fate.zo import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter; import org.apache.accumulo.core.fate.zookeeper.ZooUtil; import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeExistsPolicy; -import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy; import org.apache.accumulo.core.lock.ServiceLock; - import org.apache.accumulo.core.lock.ServiceLock.LockLossReason; -import org.apache.accumulo.core.lock.ServiceLock.ServiceLockPath; import org.apache.accumulo.core.lock.ServiceLockData; +import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptor; +import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptors; import org.apache.accumulo.core.lock.ServiceLockData.ThriftService; +import org.apache.accumulo.core.lock.ServiceLockPaths.AddressSelector; +import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; + import org.apache.accumulo.core.lock.ServiceLockSupport.HAServiceLockWatcher; import org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl; import org.apache.accumulo.core.manager.balancer.BalanceParamsImpl; import org.apache.accumulo.core.manager.balancer.TServerStatusImpl; @@@ -113,8 -103,8 +113,7 @@@ import org.apache.accumulo.core.spi.bal import org.apache.accumulo.core.spi.balancer.data.TServerStatus; import org.apache.accumulo.core.spi.balancer.data.TabletMigration; import org.apache.accumulo.core.spi.balancer.data.TabletServerId; -import org.apache.accumulo.core.tablet.thrift.TUnloadTabletGoal; import org.apache.accumulo.core.trace.TraceUtil; - import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Retry; import org.apache.accumulo.core.util.Timer; import org.apache.accumulo.core.util.threads.ThreadPools; @@@ -1556,18 -1571,13 +1495,18 @@@ public class Manager extends AbstractSe getHostname() + ":" + getConfiguration().getPort(Property.MANAGER_CLIENTPORT)[0]; UUID zooLockUUID = UUID.randomUUID(); - ServiceLockData sld = - new ServiceLockData(zooLockUUID, managerClientAddress, ThriftService.MANAGER); + + ServiceDescriptors descriptors = new ServiceDescriptors(); + descriptors.addService(new ServiceDescriptor(zooLockUUID, ThriftService.MANAGER, + managerClientAddress, this.getResourceGroup())); + + ServiceLockData sld = new ServiceLockData(descriptors); managerLock = new ServiceLock(zooKeeper, zManagerLoc, zooLockUUID); + while (true) { - ManagerLockWatcher managerLockWatcher = new ManagerLockWatcher(); + HAServiceLockWatcher managerLockWatcher = new HAServiceLockWatcher("manager"); managerLock.lock(managerLockWatcher, sld); managerLockWatcher.waitForChange(); diff --cc server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java index 1ac2914df8,839dfd0eae..320c8459d2 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java @@@ -72,8 -70,8 +72,7 @@@ import org.apache.accumulo.core.tablets import org.apache.accumulo.core.tabletserver.thrift.ActiveCompaction; import org.apache.accumulo.core.tabletserver.thrift.TabletServerClientService.Client; import org.apache.accumulo.core.trace.TraceUtil; - import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Pair; -import org.apache.accumulo.core.util.compaction.ExternalCompactionUtil; import org.apache.accumulo.core.util.threads.Threads; import org.apache.accumulo.monitor.rest.compactions.external.ExternalCompactionInfo; import org.apache.accumulo.monitor.rest.compactions.external.RunningCompactions; @@@ -742,14 -751,11 +741,13 @@@ public class Monitor extends AbstractSe // Get a ZooLock for the monitor UUID zooLockUUID = UUID.randomUUID(); monitorLock = new ServiceLock(zoo.getZooKeeper(), monitorLockPath, zooLockUUID); ++ HAServiceLockWatcher monitorLockWatcher = new HAServiceLockWatcher("monitor"); while (true) { - MoniterLockWatcher monitorLockWatcher = new MoniterLockWatcher(); - - HAServiceLockWatcher monitorLockWatcher = new HAServiceLockWatcher("monitor"); - monitorLock.lock(monitorLockWatcher, new ServiceLockData(zooLockUUID, - monitorLocation.getHost() + ":" + monitorLocation.getPort(), ThriftService.NONE)); + monitorLock.lock(monitorLockWatcher, + new ServiceLockData(zooLockUUID, + monitorLocation.getHost() + ":" + monitorLocation.getPort(), ThriftService.NONE, + this.getResourceGroup())); monitorLockWatcher.waitForChange(); diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java index 021ee3dbde,8db73cb001..eb30a0375c --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java @@@ -75,7 -74,7 +74,8 @@@ import org.apache.accumulo.core.lock.Se import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptor; import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptors; import org.apache.accumulo.core.lock.ServiceLockData.ThriftService; +import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; + import org.apache.accumulo.core.lock.ServiceLockSupport.ServiceLockWatcher; import org.apache.accumulo.core.metadata.ScanServerRefTabletFile; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.schema.Ample; @@@ -91,10 -90,8 +91,9 @@@ import org.apache.accumulo.core.tablets import org.apache.accumulo.core.tabletscan.thrift.TooManyFilesException; import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException; import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException; - import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Timer; import org.apache.accumulo.core.util.UtilWaitThread; +import org.apache.accumulo.core.util.cache.Caches.CacheName; import org.apache.accumulo.core.util.threads.ThreadPools; import org.apache.accumulo.server.AbstractServer; import org.apache.accumulo.server.ServerContext; @@@ -349,25 -342,11 +348,9 @@@ public class ScanServer extends Abstrac } throw e; } - - serverLockUUID = UUID.randomUUID(); scanServerLock = new ServiceLock(zoo.getZooKeeper(), zLockPath, serverLockUUID); - - LockWatcher lw = new LockWatcher() { - - @Override - public void lostLock(final LockLossReason reason) { - Halt.halt(serverStopRequested ? 0 : 1, () -> { - if (!serverStopRequested) { - LOG.error("Lost tablet server lock (reason = {}), exiting.", reason); - } - context.getLowMemoryDetector().logGCInfo(getConfiguration()); - }); - } - - @Override - public void unableToMonitorLockNode(final Exception e) { - Halt.halt(1, () -> LOG.error("Lost ability to monitor scan server lock, exiting.", e)); - } - }; + LockWatcher lw = new ServiceLockWatcher("scan server", () -> serverStopRequested, + (name) -> context.getLowMemoryDetector().logGCInfo(getConfiguration())); for (int i = 0; i < 120 / 5; i++) { zoo.putPersistentData(zLockPath.toString(), new byte[0], NodeExistsPolicy.SKIP); diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java index a7282c0688,9d87e7fea6..0f29d0c165 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java @@@ -85,7 -83,8 +84,8 @@@ import org.apache.accumulo.core.lock.Se import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptor; import org.apache.accumulo.core.lock.ServiceLockData.ServiceDescriptors; import org.apache.accumulo.core.lock.ServiceLockData.ThriftService; +import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; + import org.apache.accumulo.core.lock.ServiceLockSupport.ServiceLockWatcher; -import org.apache.accumulo.core.manager.thrift.BulkImportState; import org.apache.accumulo.core.manager.thrift.Compacting; import org.apache.accumulo.core.manager.thrift.ManagerClientService; import org.apache.accumulo.core.manager.thrift.TableInfo; @@@ -98,12 -97,9 +98,11 @@@ import org.apache.accumulo.core.metrics import org.apache.accumulo.core.rpc.ThriftUtil; import org.apache.accumulo.core.rpc.clients.ThriftClientTypes; import org.apache.accumulo.core.spi.fs.VolumeChooserEnvironment; +import org.apache.accumulo.core.spi.ondemand.OnDemandTabletUnloader; +import org.apache.accumulo.core.spi.ondemand.OnDemandTabletUnloader.UnloaderParams; +import org.apache.accumulo.core.tabletserver.UnloaderParamsImpl; import org.apache.accumulo.core.tabletserver.log.LogEntry; -import org.apache.accumulo.core.trace.TraceUtil; import org.apache.accumulo.core.util.ComparablePair; - import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.MapCounter; import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.core.util.Retry;