This is an automated email from the ASF dual-hosted git repository.

edcoleman pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 82b80d0d62bd4290ac85a857b566d6cc0f864367
Merge: a198c1fbc4 7f58fc03b2
Author: Ed Coleman <edcole...@apache.org>
AuthorDate: Mon May 6 17:29:28 2024 +0000

    Merge remote-tracking branch 'upstream/main' into elasticity
    
    includes multiple PRs:
    
     - 4526 bouncy castle version bump
     - 4522 update scan server metrics for file reservations
     - 4520 log hints to select a scan server

 .../accumulo/core/metrics/MetricsProducer.java     | 108 +++++++++++----------
 pom.xml                                            |   2 +-
 .../accumulo/manager/TabletGroupWatcher.java       |  18 ++--
 .../java/org/apache/accumulo/monitor/Monitor.java  |   6 +-
 .../org/apache/accumulo/tserver/ScanServer.java    |  53 ++++++++--
 .../apache/accumulo/tserver/ScanServerMetrics.java |  59 +++++++++++
 .../accumulo/tserver/ThriftScanClientHandler.java  |   4 +-
 .../tserver/metrics/TabletServerScanMetrics.java   |  20 ++--
 .../apache/accumulo/tserver/ScanServerTest.java    |   9 ++
 .../apache/accumulo/test/metrics/MetricsIT.java    |   8 +-
 10 files changed, 204 insertions(+), 83 deletions(-)

diff --cc 
server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 18682789c5,c3a77f2297..060411fcbf
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@@ -972,43 -1449,20 +972,49 @@@ abstract class TabletGroupWatcher exten
      }
      tLists.assignments.addAll(tLists.assigned);
      for (Assignment a : tLists.assignments) {
-       TServerConnection client = manager.tserverSet.getConnection(a.server);
-       if (client != null) {
-         client.assignTablet(manager.managerLock, a.tablet);
-       } else {
-         Manager.log.warn("Could not connect to server {}", a.server);
+       try {
+         TServerConnection client = manager.tserverSet.getConnection(a.server);
+         if (client != null) {
+           client.assignTablet(manager.managerLock, a.tablet);
+           manager.assignedTablet(a.tablet);
+         } else {
+           Manager.log.warn("Could not connect to server {} for assignment of 
{}", a.server,
+               a.tablet);
+         }
+       } catch (TException tException) {
+         Manager.log.warn("Could not connect to server {} for assignment of 
{}", a.server, a.tablet,
+             tException);
        }
-       manager.assignedTablet(a.tablet);
      }
 +
 +    replaceVolumes(tLists.volumeReplacements);
 +  }
 +
 +  private void replaceVolumes(List<VolumeUtil.VolumeReplacements> 
volumeReplacementsList) {
 +    try (var tabletsMutator = 
manager.getContext().getAmple().conditionallyMutateTablets()) {
 +      for (VolumeUtil.VolumeReplacements vr : volumeReplacementsList) {
 +        var tabletMutator =
 +            
tabletsMutator.mutateTablet(vr.tabletMeta.getExtent()).requireAbsentOperation()
 +                .requireAbsentLocation().requireSame(vr.tabletMeta, FILES, 
LOGS);
 +        vr.logsToRemove.forEach(tabletMutator::deleteWal);
 +        vr.logsToAdd.forEach(tabletMutator::putWal);
 +
 +        vr.filesToRemove.forEach(tabletMutator::deleteFile);
 +        vr.filesToAdd.forEach(tabletMutator::putFile);
 +
 +        tabletMutator.submit(
 +            tm -> tm.getLogs().containsAll(vr.logsToAdd) && 
tm.getFiles().containsAll(vr.filesToAdd
 +                
.keySet().stream().map(ReferencedTabletFile::insert).collect(Collectors.toSet())));
 +      }
 +
 +      tabletsMutator.process().forEach((extent, result) -> {
 +        if (result.getStatus() == Ample.ConditionalResult.Status.REJECTED) {
 +          // log that failure happened, should try again later
 +          LOG.debug("Failed to update volumes for tablet {}", extent);
 +        }
 +      });
 +    }
 +
    }
  
    private static void markDeadServerLogsAsClosed(WalStateManager mgr,
diff --cc server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java
index 51be4e13d8,10ccf19c6c..1fb3a66108
--- a/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java
+++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java
@@@ -486,11 -489,11 +486,11 @@@ public class Monitor extends AbstractSe
          log.error("Unable to get hostname", e);
        }
      }
-     log.debug("Using {} to advertise monitor location in ZooKeeper", 
advertiseHost);
+     HostAndPort monitorHostAndPort = HostAndPort.fromParts(advertiseHost, 
livePort);
+     log.debug("Using {} to advertise monitor location in ZooKeeper", 
monitorHostAndPort);
  
      MetricsInfo metricsInfo = getContext().getMetricsInfo();
-     metricsInfo.addServiceTags(getApplicationName(), 
HostAndPort.fromParts(advertiseHost, livePort),
-         getResourceGroup());
 -    metricsInfo.addServiceTags(getApplicationName(), monitorHostAndPort);
++    metricsInfo.addServiceTags(getApplicationName(), monitorHostAndPort, 
getResourceGroup());
      metricsInfo.addMetricsProducers(this);
      metricsInfo.init();
  
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
index d158d3adc9,44ab680a98..a3a7ce0cfe
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
@@@ -238,9 -243,8 +240,9 @@@ public class ScanServer extends Abstrac
              "Tablet metadata caching less than one minute, may cause 
excessive scans on metadata table.");
        }
        tabletMetadataCache =
 -          Caffeine.newBuilder().expireAfterWrite(cacheExpiration, 
TimeUnit.MILLISECONDS)
 +          
context.getCaches().createNewBuilder(CacheName.SCAN_SERVER_TABLET_METADATA, 
true)
 +              .expireAfterWrite(cacheExpiration, TimeUnit.MILLISECONDS)
-               
.scheduler(Scheduler.systemScheduler()).build(tabletMetadataLoader);
+               
.scheduler(Scheduler.systemScheduler()).recordStats().build(tabletMetadataLoader);
      }
  
      delegate = newThriftScanClientHandler(new WriteTracker());
@@@ -376,10 -378,13 +378,12 @@@
      }
  
      MetricsInfo metricsInfo = getContext().getMetricsInfo();
 -    metricsInfo.addServiceTags(getApplicationName(), clientAddress);
 -    metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName)));
 +    metricsInfo.addServiceTags(getApplicationName(), clientAddress, 
getResourceGroup());
+ 
      scanMetrics = new TabletServerScanMetrics();
+     scanServerMetrics = new ScanServerMetrics(tabletMetadataCache);
  
-     metricsInfo.addMetricsProducers(this, scanMetrics);
+     metricsInfo.addMetricsProducers(this, scanMetrics, scanServerMetrics);
      metricsInfo.init();
      // We need to set the compaction manager so that we don't get an NPE in 
CompactableImpl.close
  
diff --cc test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index 5ed78da2ca,f2be6e71a5..1a8389f8ea
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@@ -103,13 -96,16 +103,15 @@@ public class MetricsIT extends Configur
    @Test
    public void confirmMetricsPublished() throws Exception {
  
 -    doWorkToGenerateMetrics();
 -    cluster.stop();
 -
 -    Set<String> unexpectedMetrics =
 -        Set.of(METRICS_SCAN_YIELDS, METRICS_UPDATE_ERRORS, 
METRICS_COMPACTOR_MAJC_STUCK,
 -            METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_PAUSED_FOR_MEM,
 -            METRICS_SCAN_RETURN_FOR_MEM, METRICS_MINC_PAUSED, 
METRICS_MAJC_PAUSED);
 +    Set<String> unexpectedMetrics = Set.of(METRICS_SCAN_YIELDS, 
METRICS_UPDATE_ERRORS,
-         METRICS_SCAN_BUSY_TIMEOUT, METRICS_SCAN_PAUSED_FOR_MEM, 
METRICS_SCAN_RETURN_FOR_MEM,
-         METRICS_MINC_PAUSED, METRICS_MAJC_PAUSED, METRICS_SERVER_IDLE);
++        METRICS_SCAN_PAUSED_FOR_MEM, METRICS_SCAN_RETURN_FOR_MEM, 
METRICS_MINC_PAUSED,
++        METRICS_MAJC_PAUSED, METRICS_SERVER_IDLE);
      Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, 
METRICS_FATE_TYPE_IN_PROGRESS,
 -        METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER,
 -        METRICS_SCAN_TABLET_METADATA_CACHE);
 +        METRICS_TSERVER_TABLETS_ONLINE_ONDEMAND, 
METRICS_TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM,
 +        METRICS_COMPACTOR_MAJC_STUCK, METRICS_MANAGER_ROOT_TGW_ERRORS,
-         METRICS_MANAGER_META_TGW_ERRORS, METRICS_MANAGER_USER_TGW_ERRORS);
++        METRICS_MANAGER_META_TGW_ERRORS, METRICS_MANAGER_USER_TGW_ERRORS,
++        METRICS_SCAN_TABLET_METADATA_CACHE, METRICS_SCAN_BUSY_TIMEOUT_COUNTER,
++        METRICS_SCAN_RESERVATION_TIMER);
  
      Map<String,String> expectedMetricNames = this.getMetricFields();
      flakyMetrics.forEach(expectedMetricNames::remove); // might not see these

Reply via email to