This is an automated email from the ASF dual-hosted git repository. edcoleman pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 82b80d0d62bd4290ac85a857b566d6cc0f864367 Merge: a198c1fbc4 7f58fc03b2 Author: Ed Coleman <edcole...@apache.org> AuthorDate: Mon May 6 17:29:28 2024 +0000 Merge remote-tracking branch 'upstream/main' into elasticity includes multiple PRs: - 4526 bouncy castle version bump - 4522 update scan server metrics for file reservations - 4520 log hints to select a scan server .../accumulo/core/metrics/MetricsProducer.java | 108 +++++++++++---------- pom.xml | 2 +- .../accumulo/manager/TabletGroupWatcher.java | 18 ++-- .../java/org/apache/accumulo/monitor/Monitor.java | 6 +- .../org/apache/accumulo/tserver/ScanServer.java | 53 ++++++++-- .../apache/accumulo/tserver/ScanServerMetrics.java | 59 +++++++++++ .../accumulo/tserver/ThriftScanClientHandler.java | 4 +- .../tserver/metrics/TabletServerScanMetrics.java | 20 ++-- .../apache/accumulo/tserver/ScanServerTest.java | 9 ++ .../apache/accumulo/test/metrics/MetricsIT.java | 8 +- 10 files changed, 204 insertions(+), 83 deletions(-) diff --cc server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 18682789c5,c3a77f2297..060411fcbf --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@@ -972,43 -1449,20 +972,49 @@@ abstract class TabletGroupWatcher exten } tLists.assignments.addAll(tLists.assigned); for (Assignment a : tLists.assignments) { - TServerConnection client = manager.tserverSet.getConnection(a.server); - if (client != null) { - client.assignTablet(manager.managerLock, a.tablet); - } else { - Manager.log.warn("Could not connect to server {}", a.server); + try { + TServerConnection client = manager.tserverSet.getConnection(a.server); + if (client != null) { + client.assignTablet(manager.managerLock, a.tablet); + manager.assignedTablet(a.tablet); + } else { + Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, + a.tablet); + } + } catch (TException tException) { + Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, a.tablet, + tException); } - manager.assignedTablet(a.tablet); } + + replaceVolumes(tLists.volumeReplacements); + } + + private void replaceVolumes(List<VolumeUtil.VolumeReplacements> volumeReplacementsList) { + try (var tabletsMutator = manager.getContext().getAmple().conditionallyMutateTablets()) { + for (VolumeUtil.VolumeReplacements vr : volumeReplacementsList) { + var tabletMutator = + tabletsMutator.mutateTablet(vr.tabletMeta.getExtent()).requireAbsentOperation() + .requireAbsentLocation().requireSame(vr.tabletMeta, FILES, LOGS); + vr.logsToRemove.forEach(tabletMutator::deleteWal); + vr.logsToAdd.forEach(tabletMutator::putWal); + + vr.filesToRemove.forEach(tabletMutator::deleteFile); + vr.filesToAdd.forEach(tabletMutator::putFile); + + tabletMutator.submit( + tm -> tm.getLogs().containsAll(vr.logsToAdd) && tm.getFiles().containsAll(vr.filesToAdd + .keySet().stream().map(ReferencedTabletFile::insert).collect(Collectors.toSet()))); + } + + tabletsMutator.process().forEach((extent, result) -> { + if (result.getStatus() == Ample.ConditionalResult.Status.REJECTED) { + // log that failure happened, should try again later + LOG.debug("Failed to update volumes for tablet {}", extent); + } + }); + } + } private static void markDeadServerLogsAsClosed(WalStateManager mgr, diff --cc server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java index 51be4e13d8,10ccf19c6c..1fb3a66108 --- a/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java +++ b/server/monitor/src/main/java/org/apache/accumulo/monitor/Monitor.java @@@ -486,11 -489,11 +486,11 @@@ public class Monitor extends AbstractSe log.error("Unable to get hostname", e); } } - log.debug("Using {} to advertise monitor location in ZooKeeper", advertiseHost); + HostAndPort monitorHostAndPort = HostAndPort.fromParts(advertiseHost, livePort); + log.debug("Using {} to advertise monitor location in ZooKeeper", monitorHostAndPort); MetricsInfo metricsInfo = getContext().getMetricsInfo(); - metricsInfo.addServiceTags(getApplicationName(), HostAndPort.fromParts(advertiseHost, livePort), - getResourceGroup()); - metricsInfo.addServiceTags(getApplicationName(), monitorHostAndPort); ++ metricsInfo.addServiceTags(getApplicationName(), monitorHostAndPort, getResourceGroup()); metricsInfo.addMetricsProducers(this); metricsInfo.init(); diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java index d158d3adc9,44ab680a98..a3a7ce0cfe --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java @@@ -238,9 -243,8 +240,9 @@@ public class ScanServer extends Abstrac "Tablet metadata caching less than one minute, may cause excessive scans on metadata table."); } tabletMetadataCache = - Caffeine.newBuilder().expireAfterWrite(cacheExpiration, TimeUnit.MILLISECONDS) + context.getCaches().createNewBuilder(CacheName.SCAN_SERVER_TABLET_METADATA, true) + .expireAfterWrite(cacheExpiration, TimeUnit.MILLISECONDS) - .scheduler(Scheduler.systemScheduler()).build(tabletMetadataLoader); + .scheduler(Scheduler.systemScheduler()).recordStats().build(tabletMetadataLoader); } delegate = newThriftScanClientHandler(new WriteTracker()); @@@ -376,10 -378,13 +378,12 @@@ } MetricsInfo metricsInfo = getContext().getMetricsInfo(); - metricsInfo.addServiceTags(getApplicationName(), clientAddress); - metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName))); + metricsInfo.addServiceTags(getApplicationName(), clientAddress, getResourceGroup()); + scanMetrics = new TabletServerScanMetrics(); + scanServerMetrics = new ScanServerMetrics(tabletMetadataCache); - metricsInfo.addMetricsProducers(this, scanMetrics); + metricsInfo.addMetricsProducers(this, scanMetrics, scanServerMetrics); metricsInfo.init(); // We need to set the compaction manager so that we don't get an NPE in CompactableImpl.close diff --cc test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java index 5ed78da2ca,f2be6e71a5..1a8389f8ea --- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java @@@ -103,13 -96,16 +103,15 @@@ public class MetricsIT extends Configur @Test public void confirmMetricsPublished() throws Exception { - doWorkToGenerateMetrics(); - cluster.stop(); - - Set<String> unexpectedMetrics = - Set.of(METRICS_SCAN_YIELDS, METRICS_UPDATE_ERRORS, METRICS_COMPACTOR_MAJC_STUCK, - METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_PAUSED_FOR_MEM, - METRICS_SCAN_RETURN_FOR_MEM, METRICS_MINC_PAUSED, METRICS_MAJC_PAUSED); + Set<String> unexpectedMetrics = Set.of(METRICS_SCAN_YIELDS, METRICS_UPDATE_ERRORS, - METRICS_SCAN_BUSY_TIMEOUT, METRICS_SCAN_PAUSED_FOR_MEM, METRICS_SCAN_RETURN_FOR_MEM, - METRICS_MINC_PAUSED, METRICS_MAJC_PAUSED, METRICS_SERVER_IDLE); ++ METRICS_SCAN_PAUSED_FOR_MEM, METRICS_SCAN_RETURN_FOR_MEM, METRICS_MINC_PAUSED, ++ METRICS_MAJC_PAUSED, METRICS_SERVER_IDLE); Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, METRICS_FATE_TYPE_IN_PROGRESS, - METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER, - METRICS_SCAN_TABLET_METADATA_CACHE); + METRICS_TSERVER_TABLETS_ONLINE_ONDEMAND, METRICS_TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM, + METRICS_COMPACTOR_MAJC_STUCK, METRICS_MANAGER_ROOT_TGW_ERRORS, - METRICS_MANAGER_META_TGW_ERRORS, METRICS_MANAGER_USER_TGW_ERRORS); ++ METRICS_MANAGER_META_TGW_ERRORS, METRICS_MANAGER_USER_TGW_ERRORS, ++ METRICS_SCAN_TABLET_METADATA_CACHE, METRICS_SCAN_BUSY_TIMEOUT_COUNTER, ++ METRICS_SCAN_RESERVATION_TIMER); Map<String,String> expectedMetricNames = this.getMetricFields(); flakyMetrics.forEach(expectedMetricNames::remove); // might not see these