This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/elasticity by this push:
     new f6909cd706 fixes multiple upgrade bugs found when testing (#4717)
f6909cd706 is described below

commit f6909cd7067cad575e70893f3c1d5f4c99cc5676
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Tue Jul 2 10:10:48 2024 -0700

    fixes multiple upgrade bugs found when testing (#4717)
    
    Fixed a bug where upgrade code was attempting to read the managers
    service lock before it was set.
    
    Fixed a bug with detecting fate operations.  The code to detect fate
    operations would hang trying to read the fate table which did not
    exists yet.
    
    Scans of the metadata table running as the !SYSTEM user would fail
    if no auths were specified.  The !SYSTEM user has not auths set in
    zookeeper and an exception would happen when trying to find them.
    
    A new zookeeper node related to compaction changes needed to be
    created.
    
    Simplified code for setting tablet availability by using DataLevel.
    
    
    Co-authored-by: Dave Marion <dlmar...@apache.org>
---
 .../accumulo/server/AccumuloDataVersion.java       |  6 +--
 .../java/org/apache/accumulo/manager/Manager.java  |  2 +-
 .../manager/upgrade/UpgradeCoordinator.java        | 30 ++++++------
 .../accumulo/manager/upgrade/Upgrader12to13.java   | 54 +++++++++++-----------
 4 files changed, 42 insertions(+), 50 deletions(-)

diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java 
b/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
index 598b8e48cf..e2dfa4d961 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/AccumuloDataVersion.java
@@ -82,10 +82,8 @@ public class AccumuloDataVersion {
     return CURRENT_VERSION;
   }
 
-  // ELASTICITY_TODO get upgrade working #4587
-  // public static final Set<Integer> CAN_RUN = 
Set.of(ROOT_TABLET_META_CHANGES,
-  // REMOVE_DEPRECATIONS_FOR_VERSION_3, METADATA_FILE_JSON_ENCODING, 
CURRENT_VERSION);
-  public static final Set<Integer> CAN_RUN = Set.of(CURRENT_VERSION);
+  public static final Set<Integer> CAN_RUN = Set.of(CURRENT_VERSION,
+      REMOVE_DEPRECATIONS_FOR_VERSION_3, METADATA_FILE_JSON_ENCODING, 
ROOT_TABLET_META_CHANGES);
 
   /**
    * Get the stored, current working version.
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java 
b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index d5778a44c5..28be0b7ffb 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@ -1037,7 +1037,6 @@ public class Manager extends AbstractServer
     } catch (KeeperException | InterruptedException e) {
       throw new IllegalStateException("Exception getting manager lock", e);
     }
-    this.getContext().setServiceLock(getManagerLock());
 
     // If UpgradeStatus is not at complete by this moment, then things are 
currently
     // upgrading.
@@ -1463,6 +1462,7 @@ public class Manager extends AbstractServer
       sleepUninterruptibly(TIME_TO_WAIT_BETWEEN_LOCK_CHECKS, MILLISECONDS);
     }
 
+    this.getContext().setServiceLock(getManagerLock());
     setManagerState(ManagerState.HAVE_LOCK);
     return sld;
   }
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
 
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
index 46d29516c2..2f4f7dd2ff 100644
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/UpgradeCoordinator.java
@@ -31,7 +31,6 @@ import java.util.TreeMap;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
 import java.util.concurrent.SynchronousQueue;
-import java.util.stream.Stream;
 
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.AccumuloException;
@@ -39,9 +38,6 @@ import 
org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.NamespaceNotFoundException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.conf.ConfigCheckUtil;
-import org.apache.accumulo.core.fate.MetaFateStore;
-import org.apache.accumulo.core.fate.ReadOnlyFateStore;
-import org.apache.accumulo.core.fate.user.UserFateStore;
 import org.apache.accumulo.core.metadata.schema.Ample;
 import org.apache.accumulo.core.util.threads.ThreadPools;
 import org.apache.accumulo.core.volume.Volume;
@@ -309,19 +305,19 @@ public class UpgradeCoordinator {
       justification = "Want to immediately stop all manager threads on upgrade 
error")
   private void abortIfFateTransactions(ServerContext context) {
     try {
-      final ReadOnlyFateStore<UpgradeCoordinator> mfs = new MetaFateStore<>(
-          context.getZooKeeperRoot() + Constants.ZFATE, 
context.getZooReaderWriter());
-      final ReadOnlyFateStore<UpgradeCoordinator> ufs = new 
UserFateStore<>(context);
-      try (var mfsList = mfs.list(); var ufsList = ufs.list();
-          var idStream = Stream.concat(mfsList, ufsList)) {
-        if (idStream.findFirst().isPresent()) {
-          throw new AccumuloException("Aborting upgrade because there are"
-              + " outstanding FATE transactions from a previous Accumulo 
version."
-              + " You can start the tservers and then use the shell to delete 
completed "
-              + " transactions. If there are incomplete transactions, you will 
need to roll"
-              + " back and fix those issues. Please see the following page for 
more information: "
-              + " 
https://accumulo.apache.org/docs/2.x/troubleshooting/advanced#upgrade-issues";);
-        }
+      // The current version of the code creates the new accumulo.fate table 
on upgrade, so no
+      // attempt is made to read it here. Attempting to read it this point 
would likely cause a hang
+      // as tablets are not assigned when this is called. The Fate code is not 
used to read from
+      // zookeeper below because the serialization format changed in 
zookeeper, that is why a direct
+      // read is performed.
+      if (!context.getZooReader().getChildren(context.getZooKeeperRoot() + 
Constants.ZFATE)
+          .isEmpty()) {
+        throw new AccumuloException("Aborting upgrade because there are"
+            + " outstanding FATE transactions from a previous Accumulo 
version."
+            + " You can start the tservers and then use the shell to delete 
completed "
+            + " transactions. If there are incomplete transactions, you will 
need to roll"
+            + " back and fix those issues. Please see the following page for 
more information: "
+            + " 
https://accumulo.apache.org/docs/2.x/troubleshooting/advanced#upgrade-issues";);
       }
     } catch (Exception exception) {
       log.error("Problem verifying Fate readiness", exception);
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
 
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
index 025b08ef79..4071d64180 100644
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader12to13.java
@@ -68,11 +68,13 @@ public class Upgrader12to13 implements Upgrader {
   @Override
   public void upgradeZookeeper(ServerContext context) {
     LOG.info("setting root table stored hosting availability");
-    addHostingGoalToRootTable(context);
+    addHostingGoals(context, TabletAvailability.HOSTED, DataLevel.ROOT);
     LOG.info("Removing nodes no longer used from ZooKeeper");
     removeUnusedZKNodes(context);
     LOG.info("Removing compact columns from root tablet");
     removeCompactColumnsFromRootTabletMetadata(context);
+    LOG.info("Adding compactions node to zookeeper");
+    addCompactionsNode(context);
   }
 
   @Override
@@ -82,7 +84,7 @@ public class Upgrader12to13 implements Upgrader {
     LOG.info("Looking for partial splits");
     handlePartialSplits(context, AccumuloTable.ROOT.tableName());
     LOG.info("setting metadata table hosting availability");
-    addHostingGoalToMetadataTable(context);
+    addHostingGoals(context, TabletAvailability.HOSTED, DataLevel.METADATA);
     LOG.info("Removing MetadataBulkLoadFilter iterator from root table");
     removeMetaDataBulkLoadFilter(context, AccumuloTable.ROOT.tableId());
     LOG.info("Removing compact columns from metadata tablets");
@@ -94,7 +96,7 @@ public class Upgrader12to13 implements Upgrader {
     LOG.info("Looking for partial splits");
     handlePartialSplits(context, AccumuloTable.METADATA.tableName());
     LOG.info("setting hosting availability on user tables");
-    addHostingGoalToUserTables(context);
+    addHostingGoals(context, TabletAvailability.ONDEMAND, DataLevel.USER);
     LOG.info("Deleting external compaction final states from user tables");
     deleteExternalCompactionFinalStates(context);
     LOG.info("Deleting external compaction from user tables");
@@ -105,6 +107,16 @@ public class Upgrader12to13 implements Upgrader {
     removeCompactColumnsFromTable(context, AccumuloTable.METADATA.tableName());
   }
 
+  private static void addCompactionsNode(ServerContext context) {
+    try {
+      context.getZooReaderWriter().putPersistentData(
+          ZooUtil.getRoot(context.getInstanceID()) + Constants.ZCOMPACTIONS, 
new byte[0],
+          ZooUtil.NodeExistsPolicy.SKIP);
+    } catch (KeeperException | InterruptedException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
   private void createFateTable(ServerContext context) {
     try {
       TableManager.prepareNewTableState(context, AccumuloTable.FATE.tableId(),
@@ -163,7 +175,7 @@ public class Upgrader12to13 implements Upgrader {
 
   private void removeCompactColumnsFromTable(ServerContext context, String 
tableName) {
 
-    try (var scanner = context.createScanner(tableName);
+    try (var scanner = context.createScanner(tableName, Authorizations.EMPTY);
         var writer = context.createBatchWriter(tableName)) {
       scanner.setRange(MetadataSchema.TabletsSection.getRange());
       COMPACT_COL.fetch(scanner);
@@ -220,7 +232,9 @@ public class Upgrader12to13 implements Upgrader {
     // Compactions are committed in a completely different way now, so delete 
these entries. Its
     // possible some completed compactions may need to be redone, but 
processing these entries would
     // not be easy to test so its better for correctness to delete them and 
redo the work.
-    try (var scanner = 
context.createScanner(AccumuloTable.METADATA.tableName());
+    try (
+        var scanner =
+            context.createScanner(AccumuloTable.METADATA.tableName(), 
Authorizations.EMPTY);
         var writer = 
context.createBatchWriter(AccumuloTable.METADATA.tableName())) {
       var section = new Section(RESERVED_PREFIX + "ecomp", true, 
RESERVED_PREFIX + "ecomq", false);
       scanner.setRange(section.getRange());
@@ -240,31 +254,13 @@ public class Upgrader12to13 implements Upgrader {
     }
   }
 
-  private void addHostingGoalToSystemTable(ServerContext context, TableId 
tableId) {
+  private void addHostingGoals(ServerContext context, TabletAvailability 
availability,
+      DataLevel level) {
     try (
         TabletsMetadata tm =
-            
context.getAmple().readTablets().forTable(tableId).fetch(ColumnType.PREV_ROW).build();
-        TabletsMutator mut = context.getAmple().mutateTablets()) {
-      tm.forEach(t -> mut.mutateTablet(t.getExtent())
-          .putTabletAvailability(TabletAvailability.HOSTED).mutate());
-    }
-  }
-
-  private void addHostingGoalToRootTable(ServerContext context) {
-    addHostingGoalToSystemTable(context, AccumuloTable.ROOT.tableId());
-  }
-
-  private void addHostingGoalToMetadataTable(ServerContext context) {
-    addHostingGoalToSystemTable(context, AccumuloTable.METADATA.tableId());
-  }
-
-  private void addHostingGoalToUserTables(ServerContext context) {
-    try (
-        TabletsMetadata tm = 
context.getAmple().readTablets().forLevel(DataLevel.USER)
-            .fetch(ColumnType.PREV_ROW).build();
+            
context.getAmple().readTablets().forLevel(level).fetch(ColumnType.PREV_ROW).build();
         TabletsMutator mut = context.getAmple().mutateTablets()) {
-      tm.forEach(t -> mut.mutateTablet(t.getExtent())
-          .putTabletAvailability(TabletAvailability.ONDEMAND).mutate());
+      tm.forEach(t -> 
mut.mutateTablet(t.getExtent()).putTabletAvailability(availability).mutate());
     }
   }
 
@@ -273,7 +269,9 @@ public class Upgrader12to13 implements Upgrader {
     // process the metadata table. The metadata related to an external 
compaction has changed so
     // delete any that exists. Not using Ample in case there are problems 
deserializing the old
     // external compaction metadata.
-    try (var scanner = 
context.createScanner(AccumuloTable.METADATA.tableName());
+    try (
+        var scanner =
+            context.createScanner(AccumuloTable.METADATA.tableName(), 
Authorizations.EMPTY);
         var writer = 
context.createBatchWriter(AccumuloTable.METADATA.tableName())) {
       scanner.setRange(TabletsSection.getRange());
       scanner.fetchColumnFamily(ExternalCompactionColumnFamily.NAME);

Reply via email to