This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/elasticity by this push: new c44216d764 fixes tablet location checks in tabletserver (#3891) c44216d764 is described below commit c44216d7643c50830e0619a2037d4a8d20c15a01 Author: Keith Turner <ktur...@apache.org> AuthorDate: Wed Oct 25 17:04:01 2023 -0400 fixes tablet location checks in tabletserver (#3891) During write ahead log recovery the tablet server was trying to write a new file and require the tablets current location to be the tablet server. However at that time the tablet server was the tablets future location and therefore the update failed. This commit adust the expected location to be the future location during recovery. --- .../core/metadata/schema/MetadataTime.java | 5 ++++ .../org/apache/accumulo/tserver/tablet/Tablet.java | 29 ++++++++++++++-------- .../apache/accumulo/test/functional/RestartIT.java | 3 --- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataTime.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataTime.java index eb96c309b9..eb79b6d017 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataTime.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataTime.java @@ -121,4 +121,9 @@ public final class MetadataTime implements Comparable<MetadataTime> { "Cannot compare different time types: " + this + " and " + mtime); } } + + @Override + public String toString() { + return encode(); + } } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java index f08922fa8c..5fda28f1fe 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java @@ -372,7 +372,7 @@ public class Tablet extends TabletBase { try (Scope scope = span2.makeCurrent()) { bringMinorCompactionOnline(tmpDatafile, newDatafile, new DataFileValue(stats.getFileSize(), stats.getEntriesWritten()), commitSession, - flushId); + flushId, mincReason); } catch (Exception e) { TraceUtil.setException(span2, e, true); throw e; @@ -1297,8 +1297,8 @@ public class Tablet extends TabletBase { * Update tablet file data from flush. Returns a StoredTabletFile if there are data entries. */ public Optional<StoredTabletFile> updateTabletDataFile(long maxCommittedTime, - ReferencedTabletFile newDatafile, DataFileValue dfv, Set<String> unusedWalLogs, - long flushId) { + ReferencedTabletFile newDatafile, DataFileValue dfv, Set<String> unusedWalLogs, long flushId, + MinorCompactionReason mincReason) { Preconditions.checkState(refreshLock.isHeldByCurrentThread()); @@ -1321,8 +1321,12 @@ public class Tablet extends TabletBase { } try (var tabletsMutator = getContext().getAmple().conditionallyMutateTablets()) { - var tablet = tabletsMutator.mutateTablet(extent) - .requireLocation(Location.current(tabletServer.getTabletSession())) + + var expectedLocation = mincReason == MinorCompactionReason.RECOVERY + ? Location.future(tabletServer.getTabletSession()) + : Location.current(tabletServer.getTabletSession()); + + var tablet = tabletsMutator.mutateTablet(extent).requireLocation(expectedLocation) .requireSame(lastTabletMetadata, ColumnType.TIME); Optional<StoredTabletFile> newFile = Optional.empty(); @@ -1350,11 +1354,14 @@ public class Tablet extends TabletBase { // between the write and check. Second, some flushes do not produce a file. tablet.submit(tabletMetadata -> tabletMetadata.getTime().equals(newTime)); - if (tabletsMutator.process().get(extent).getStatus() - != Ample.ConditionalResult.Status.ACCEPTED) { + var result = tabletsMutator.process().get(extent); + if (result.getStatus() != Ample.ConditionalResult.Status.ACCEPTED) { + + log.error("Metadata for failed tablet file update : {}", result.readMetadata()); + // Include the things that could have caused the write to fail. throw new IllegalStateException("Unable to write minor compaction. " + extent + " " - + tabletServer.getTabletSession() + " " + expectedTime); + + expectedLocation + " " + expectedTime); } return newFile; @@ -1426,7 +1433,7 @@ public class Tablet extends TabletBase { void bringMinorCompactionOnline(ReferencedTabletFile tmpDatafile, ReferencedTabletFile newDatafile, DataFileValue dfv, CommitSession commitSession, - long flushId) { + long flushId, MinorCompactionReason mincReason) { Optional<StoredTabletFile> newFile; // rename before putting in metadata table, so files in metadata table should // always exist @@ -1481,7 +1488,7 @@ public class Tablet extends TabletBase { // before the following metadata write is made newFile = updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv, - unusedWalLogs, flushId); + unusedWalLogs, flushId, mincReason); finishClearingUnusedLogs(); } finally { @@ -1532,7 +1539,7 @@ public class Tablet extends TabletBase { Preconditions.checkState(tabletMetadata != null, "Tablet no longer exits %s", getExtent()); Preconditions.checkState( - Location.current(tabletServer.getTabletSession()).equals(tabletMetadata.getLocation()), + tabletServer.getTabletSession().equals(tabletMetadata.getLocation().getServerInstance()), "Tablet %s location %s is not this tserver %s", getExtent(), tabletMetadata.getLocation(), tabletServer.getTabletSession()); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java b/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java index adbd878e6a..3ab2b96859 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/RestartIT.java @@ -117,7 +117,6 @@ public class RestartIT extends AccumuloClusterHarness { } @Test - @Disabled // ELASTICITY_TODO public void restartManagerRecovery() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { String tableName = getUniqueNames(1)[0]; @@ -203,7 +202,6 @@ public class RestartIT extends AccumuloClusterHarness { } @Test - @Disabled // ELASTICITY_TODO public void killedTabletServer() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { String tableName = getUniqueNames(1)[0]; @@ -218,7 +216,6 @@ public class RestartIT extends AccumuloClusterHarness { } @Test - @Disabled // ELASTICITY_TODO public void killedTabletServer2() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { final String[] names = getUniqueNames(2);