This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/elasticity by this push:
new f6909cd706 fixes multiple upgrade bugs found when testing (#4717)
f6909cd706 is described below
commit f6909cd7067cad575e70893f3c1d5f4c99cc5676
Author: Keith Turner <[email protected]>
AuthorDate: Tue Jul 2 10:10:48 2024 -0700
fixes multiple upgrade bugs found when testing (#4717)
Fixed a bug where upgrade code was attempting to read the managers
service lock before it was set.
Fixed a bug with detecting fate operations. The code to detect fate
operations would hang trying to read the fate table which did not
exists yet.
Scans of the metadata table running as the !SYSTEM user would fail
if no auths were specified. The !SYSTEM user has not auths set in
zookeeper and an exception would happen when trying to find them.
A new zookeeper node related to compaction changes needed to be
created.
Simplified code for setting tablet availability by using DataLevel.
Co-authored-by: Dave Marion <[email protected]>
---
.../accumulo/server/AccumuloDataVersion.java | 6 +--
.../java/org/apache/accumulo/manager/Manager.java | 2 +-
.../manager/upgrade/UpgradeCoordinator.java | 30 ++++++------
.../accumulo/manager/upgrade/Upgrader12to13.java | 54 +++++++++++-----------
4 files changed, 42 insertions(+), 50 deletions(-)
diff --git
a/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
b/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
index 598b8e48cf..e2dfa4d961 100644
---
a/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
+++
b/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
@@ -82,10 +82,8 @@ public class AccumuloDataVersion {
return CURRENT_VERSION;
}
- // ELASTICITY_TODO get upgrade working #4587
- // public static final Set<Integer> CAN_RUN =
Set.of(ROOT_TABLET_META_CHANGES,
- // REMOVE_DEPRECATIONS_FOR_VERSION_3, METADATA_FILE_JSON_ENCODING,
CURRENT_VERSION);
- public static final Set<Integer> CAN_RUN = Set.of(CURRENT_VERSION);
+ public static final Set<Integer> CAN_RUN = Set.of(CURRENT_VERSION,
+ REMOVE_DEPRECATIONS_FOR_VERSION_3, METADATA_FILE_JSON_ENCODING,
ROOT_TABLET_META_CHANGES);
/**
* Get the stored, current working version.
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index d5778a44c5..28be0b7ffb 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@ -1037,7 +1037,6 @@ public class Manager extends AbstractServer
} catch (KeeperException | InterruptedException e) {
throw new IllegalStateException("Exception getting manager lock", e);
}
- this.getContext().setServiceLock(getManagerLock());
// If UpgradeStatus is not at complete by this moment, then things are
currently
// upgrading.
@@ -1463,6 +1462,7 @@ public class Manager extends AbstractServer
sleepUninterruptibly(TIME_TO_WAIT_BETWEEN_LOCK_CHECKS, MILLISECONDS);
}
+ this.getContext().setServiceLock(getManagerLock());
setManagerState(ManagerState.HAVE_LOCK);
return sld;
}
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
index 46d29516c2..2f4f7dd2ff 100644
---
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
@@ -31,7 +31,6 @@ import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.SynchronousQueue;
-import java.util.stream.Stream;
import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.AccumuloException;
@@ -39,9 +38,6 @@ import
org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.NamespaceNotFoundException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.conf.ConfigCheckUtil;
-import org.apache.accumulo.core.fate.MetaFateStore;
-import org.apache.accumulo.core.fate.ReadOnlyFateStore;
-import org.apache.accumulo.core.fate.user.UserFateStore;
import org.apache.accumulo.core.metadata.schema.Ample;
import org.apache.accumulo.core.util.threads.ThreadPools;
import org.apache.accumulo.core.volume.Volume;
@@ -309,19 +305,19 @@ public class UpgradeCoordinator {
justification = "Want to immediately stop all manager threads on upgrade
error")
private void abortIfFateTransactions(ServerContext context) {
try {
- final ReadOnlyFateStore<UpgradeCoordinator> mfs = new MetaFateStore<>(
- context.getZooKeeperRoot() + Constants.ZFATE,
context.getZooReaderWriter());
- final ReadOnlyFateStore<UpgradeCoordinator> ufs = new
UserFateStore<>(context);
- try (var mfsList = mfs.list(); var ufsList = ufs.list();
- var idStream = Stream.concat(mfsList, ufsList)) {
- if (idStream.findFirst().isPresent()) {
- throw new AccumuloException("Aborting upgrade because there are"
- + " outstanding FATE transactions from a previous Accumulo
version."
- + " You can start the tservers and then use the shell to delete
completed "
- + " transactions. If there are incomplete transactions, you will
need to roll"
- + " back and fix those issues. Please see the following page for
more information: "
- + "
https://accumulo.apache.org/docs/2.x/troubleshooting/advanced#upgrade-issues");
- }
+ // The current version of the code creates the new accumulo.fate table
on upgrade, so no
+ // attempt is made to read it here. Attempting to read it this point
would likely cause a hang
+ // as tablets are not assigned when this is called. The Fate code is not
used to read from
+ // zookeeper below because the serialization format changed in
zookeeper, that is why a direct
+ // read is performed.
+ if (!context.getZooReader().getChildren(context.getZooKeeperRoot() +
Constants.ZFATE)
+ .isEmpty()) {
+ throw new AccumuloException("Aborting upgrade because there are"
+ + " outstanding FATE transactions from a previous Accumulo
version."
+ + " You can start the tservers and then use the shell to delete
completed "
+ + " transactions. If there are incomplete transactions, you will
need to roll"
+ + " back and fix those issues. Please see the following page for
more information: "
+ + "
https://accumulo.apache.org/docs/2.x/troubleshooting/advanced#upgrade-issues");
}
} catch (Exception exception) {
log.error("Problem verifying Fate readiness", exception);
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
index 025b08ef79..4071d64180 100644
---
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
@@ -68,11 +68,13 @@ public class Upgrader12to13 implements Upgrader {
@Override
public void upgradeZookeeper(ServerContext context) {
LOG.info("setting root table stored hosting availability");
- addHostingGoalToRootTable(context);
+ addHostingGoals(context, TabletAvailability.HOSTED, DataLevel.ROOT);
LOG.info("Removing nodes no longer used from ZooKeeper");
removeUnusedZKNodes(context);
LOG.info("Removing compact columns from root tablet");
removeCompactColumnsFromRootTabletMetadata(context);
+ LOG.info("Adding compactions node to zookeeper");
+ addCompactionsNode(context);
}
@Override
@@ -82,7 +84,7 @@ public class Upgrader12to13 implements Upgrader {
LOG.info("Looking for partial splits");
handlePartialSplits(context, AccumuloTable.ROOT.tableName());
LOG.info("setting metadata table hosting availability");
- addHostingGoalToMetadataTable(context);
+ addHostingGoals(context, TabletAvailability.HOSTED, DataLevel.METADATA);
LOG.info("Removing MetadataBulkLoadFilter iterator from root table");
removeMetaDataBulkLoadFilter(context, AccumuloTable.ROOT.tableId());
LOG.info("Removing compact columns from metadata tablets");
@@ -94,7 +96,7 @@ public class Upgrader12to13 implements Upgrader {
LOG.info("Looking for partial splits");
handlePartialSplits(context, AccumuloTable.METADATA.tableName());
LOG.info("setting hosting availability on user tables");
- addHostingGoalToUserTables(context);
+ addHostingGoals(context, TabletAvailability.ONDEMAND, DataLevel.USER);
LOG.info("Deleting external compaction final states from user tables");
deleteExternalCompactionFinalStates(context);
LOG.info("Deleting external compaction from user tables");
@@ -105,6 +107,16 @@ public class Upgrader12to13 implements Upgrader {
removeCompactColumnsFromTable(context, AccumuloTable.METADATA.tableName());
}
+ private static void addCompactionsNode(ServerContext context) {
+ try {
+ context.getZooReaderWriter().putPersistentData(
+ ZooUtil.getRoot(context.getInstanceID()) + Constants.ZCOMPACTIONS,
new byte[0],
+ ZooUtil.NodeExistsPolicy.SKIP);
+ } catch (KeeperException | InterruptedException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
private void createFateTable(ServerContext context) {
try {
TableManager.prepareNewTableState(context, AccumuloTable.FATE.tableId(),
@@ -163,7 +175,7 @@ public class Upgrader12to13 implements Upgrader {
private void removeCompactColumnsFromTable(ServerContext context, String
tableName) {
- try (var scanner = context.createScanner(tableName);
+ try (var scanner = context.createScanner(tableName, Authorizations.EMPTY);
var writer = context.createBatchWriter(tableName)) {
scanner.setRange(MetadataSchema.TabletsSection.getRange());
COMPACT_COL.fetch(scanner);
@@ -220,7 +232,9 @@ public class Upgrader12to13 implements Upgrader {
// Compactions are committed in a completely different way now, so delete
these entries. Its
// possible some completed compactions may need to be redone, but
processing these entries would
// not be easy to test so its better for correctness to delete them and
redo the work.
- try (var scanner =
context.createScanner(AccumuloTable.METADATA.tableName());
+ try (
+ var scanner =
+ context.createScanner(AccumuloTable.METADATA.tableName(),
Authorizations.EMPTY);
var writer =
context.createBatchWriter(AccumuloTable.METADATA.tableName())) {
var section = new Section(RESERVED_PREFIX + "ecomp", true,
RESERVED_PREFIX + "ecomq", false);
scanner.setRange(section.getRange());
@@ -240,31 +254,13 @@ public class Upgrader12to13 implements Upgrader {
}
}
- private void addHostingGoalToSystemTable(ServerContext context, TableId
tableId) {
+ private void addHostingGoals(ServerContext context, TabletAvailability
availability,
+ DataLevel level) {
try (
TabletsMetadata tm =
-
context.getAmple().readTablets().forTable(tableId).fetch(ColumnType.PREV_ROW).build();
- TabletsMutator mut = context.getAmple().mutateTablets()) {
- tm.forEach(t -> mut.mutateTablet(t.getExtent())
- .putTabletAvailability(TabletAvailability.HOSTED).mutate());
- }
- }
-
- private void addHostingGoalToRootTable(ServerContext context) {
- addHostingGoalToSystemTable(context, AccumuloTable.ROOT.tableId());
- }
-
- private void addHostingGoalToMetadataTable(ServerContext context) {
- addHostingGoalToSystemTable(context, AccumuloTable.METADATA.tableId());
- }
-
- private void addHostingGoalToUserTables(ServerContext context) {
- try (
- TabletsMetadata tm =
context.getAmple().readTablets().forLevel(DataLevel.USER)
- .fetch(ColumnType.PREV_ROW).build();
+
context.getAmple().readTablets().forLevel(level).fetch(ColumnType.PREV_ROW).build();
TabletsMutator mut = context.getAmple().mutateTablets()) {
- tm.forEach(t -> mut.mutateTablet(t.getExtent())
- .putTabletAvailability(TabletAvailability.ONDEMAND).mutate());
+ tm.forEach(t ->
mut.mutateTablet(t.getExtent()).putTabletAvailability(availability).mutate());
}
}
@@ -273,7 +269,9 @@ public class Upgrader12to13 implements Upgrader {
// process the metadata table. The metadata related to an external
compaction has changed so
// delete any that exists. Not using Ample in case there are problems
deserializing the old
// external compaction metadata.
- try (var scanner =
context.createScanner(AccumuloTable.METADATA.tableName());
+ try (
+ var scanner =
+ context.createScanner(AccumuloTable.METADATA.tableName(),
Authorizations.EMPTY);
var writer =
context.createBatchWriter(AccumuloTable.METADATA.tableName())) {
scanner.setRange(TabletsSection.getRange());
scanner.fetchColumnFamily(ExternalCompactionColumnFamily.NAME);