This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/elasticity by this push: new d2b15204c5 Use OperationId to unhost tablets, separate from location being absent (#3835) d2b15204c5 is described below commit d2b15204c566c48b5dea33fb77f004878bfa9498 Author: Dave Marion <dlmar...@apache.org> AuthorDate: Mon Oct 16 11:11:28 2023 -0400 Use OperationId to unhost tablets, separate from location being absent (#3835) This commit uses the operationId to unhost tablets. This required changing the logic that set the operationId to not require the location to be empty. Transactions that set the operationId will have to wait for the location to be empty before continuing and will likely want to continue to verify that the location is empty as the transaction progresses. Fixes #3406 --- .../core/clientImpl/TableOperationsImpl.java | 5 +- .../accumulo/core/metadata/schema/Ample.java | 11 ++- .../core/metadata/schema/MetadataSchema.java | 44 +++++++++-- .../core/metadata/schema/TabletMetadata.java | 25 +------ .../core/metadata/schema/TabletMetadataTest.java | 26 ------- .../manager/state/AbstractTabletStateStore.java | 11 +-- .../server/manager/state/CurrentState.java | 5 -- .../manager/state/TabletManagementIterator.java | 14 ++-- .../metadata/ConditionalTabletMutatorImpl.java | 1 + .../accumulo/server/util/ManagerMetadataUtil.java | 1 + .../accumulo/manager/FateServiceHandler.java | 2 - .../java/org/apache/accumulo/manager/Manager.java | 45 ------------ .../manager/ManagerClientServiceHandler.java | 2 +- .../accumulo/manager/TabletGroupWatcher.java | 10 +-- .../apache/accumulo/manager/split/SplitTask.java | 4 - .../manager/tableOps/split/DeleteOperationIds.java | 4 +- .../accumulo/manager/tableOps/split/PreSplit.java | 85 +++++++++++++--------- .../manager/tableOps/split/UpdateTablets.java | 12 ++- .../apache/accumulo/tserver/AssignmentHandler.java | 2 +- .../accumulo/tserver/tablet/ScanfileManager.java | 2 +- .../test/functional/ManagerAssignmentIT.java | 36 +++++++-- .../functional/TabletManagementIteratorIT.java | 7 +- .../apache/accumulo/test/manager/MergeStateIT.java | 10 +-- 23 files changed, 166 insertions(+), 198 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java index 6c88dba09e..c193938e51 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java @@ -487,9 +487,8 @@ public class TableOperationsImpl extends TableOperationsHelper { } catch (ExecutionException ee) { Throwable excep = ee.getCause(); // Below all exceptions are wrapped and rethrown. This is done so that the user knows - // what - // code path got them here. If the wrapping was not done, the user would only have the - // stack trace for the background thread. + // what code path got them here. If the wrapping was not done, the user would only + // have the stack trace for the background thread. if (excep instanceof TableNotFoundException) { TableNotFoundException tnfe = (TableNotFoundException) excep; throw new TableNotFoundException(tableId.canonical(), tableName, diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java index af3506a4fb..d665ea977e 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/Ample.java @@ -403,6 +403,15 @@ public interface Ample { */ interface OperationRequirements { + /** + * This should be used to make changes to a hosted tablet and ensure the location is as + * expected. Hosted tablets should unload when an operation id set, but can update their + * metadata prior to unloading. + * + * @see MetadataSchema.TabletsSection.ServerColumnFamily#OPID_COLUMN + */ + ConditionalTabletMutator requireLocation(Location location); + /** * Require a specific operation with a unique id is present. This would be normally be called by * the code executing that operation. @@ -410,7 +419,7 @@ public interface Ample { ConditionalTabletMutator requireOperation(TabletOperationId operationId); /** - * Require that no mutually exclusive operations are runnnig against this tablet. + * Require that no mutually exclusive operations are running against this tablet. */ ConditionalTabletMutator requireAbsentOperation(); diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java index 8809909019..fefe9b6911 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/MetadataSchema.java @@ -253,13 +253,43 @@ public class MetadataSchema { public static final ColumnFQ LOCK_COLUMN = new ColumnFQ(NAME, new Text(LOCK_QUAL)); /** - * This column is used to indicate an operation is running that needs exclusive access to read - * and write to a tablet. The value uniquely identifies a FATE operation that is running and - * needs the exclusive access. All tablet updates must either ensure this column is absent or - * in the case of a FATE operation that set it ensure the value contains their FATE - * transaction id. When a FATE operation wants to set this column it must ensure its absent - * before setting it. Once a FATE operation has successfully set the column then no other - * tablet update should succeed. + * This column is used to indicate a destructive tablet operation is running that needs + * exclusive access to read and write to a tablet. The value uniquely identifies a FATE + * operation that is running and needs the exclusive access. The following goes over three + * cases for how all metadata updates should use this column. + * + * <p> + * Destructive table FATE operations like split, merge and delete will use this column in the + * following way. + * </p> + * + * <ol> + * <li>A fate operation sets the operation id on a tablet only if its not set by another + * operation</li> + * <li>Setting the operation id will cause the tablet to be unhosted. The fate operation waits + * for the tablet to have no location before making any updates.</li> + * <li>For each update made by the fate operation it will require the operation id to be set + * and the location to be absent</li> + * <li>The fate operation will delete the operation id when it finishes successfully</li> + * </ol> + * + * <p> + * Modifications for a hosted tablet will do the following. + * </p> + * + * <ul> + * <li>Ensure their location is set on the tablet when making updates w/o considering if an + * operation id is set or not. Because fate operation will wait for the location to be absent + * before making updates, the tablet can make whatever updates it needs before unloading.</li> + * <li>The future location should never be set on a tablet with no location that has an + * operation id set. This is because FATE operations assume once the location is unset that + * they have exclusive access.</li> + * </ul> + * + * <p> + * Routine modification to non hosted tablets (like bulk import, compaction, etc) should + * require the operation to be absent when making their updates. + * </p> */ public static final String OPID_QUAL = "opid"; public static final ColumnFQ OPID_COLUMN = new ColumnFQ(NAME, new Text(OPID_QUAL)); diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadata.java b/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadata.java index 496924e33b..2ae7f394b7 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadata.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadata.java @@ -122,7 +122,6 @@ public class TabletMetadata { private boolean onDemandHostingRequested = false; private TabletOperationId operationId; private boolean futureAndCurrentLocationSet = false; - private boolean operationIdAndCurrentLocationSet = false; private Set<Long> compacted; public static TabletMetadataBuilder builder(KeyExtent extent) { @@ -408,8 +407,7 @@ public class TabletMetadata { .append("extCompactions", extCompactions).append("goal", goal) .append("onDemandHostingRequested", onDemandHostingRequested) .append("operationId", operationId).append("selectedFiles", selectedFiles) - .append("futureAndCurrentLocationSet", futureAndCurrentLocationSet) - .append("operationIdAndCurrentLocationSet", operationIdAndCurrentLocationSet).toString(); + .append("futureAndCurrentLocationSet", futureAndCurrentLocationSet).toString(); } public SortedMap<Key,Value> getKeyValues() { @@ -440,10 +438,6 @@ public class TabletMetadata { return futureAndCurrentLocationSet; } - public boolean isOperationIdAndCurrentLocationSet() { - return operationIdAndCurrentLocationSet; - } - @VisibleForTesting public static <E extends Entry<Key,Value>> TabletMetadata convertRow(Iterator<E> rowIter, EnumSet<ColumnType> fetchedColumns, boolean buildKeyValueMap, boolean suppressLocationError) { @@ -608,14 +602,6 @@ public class TabletMetadata { } futureAndCurrentLocationSet = true; } - if (operationId != null) { - if (!suppressError) { - throw new IllegalStateException( - "Attempted to set location for tablet with an operation id. table ID: " + tableId - + " endrow: " + endRow + " -- operation id: " + operationId); - } - operationIdAndCurrentLocationSet = true; - } location = new Location(val, qual, lt); } @@ -628,15 +614,6 @@ public class TabletMetadata { */ private void setOperationIdOnce(String val, boolean suppressError) { Preconditions.checkState(operationId == null); - // make sure there is not already a current location set - if (location != null) { - if (!suppressError) { - throw new IllegalStateException( - "Attempted to set operation id for tablet with current location. table ID: " + tableId - + " endrow: " + endRow + " -- location: " + location); - } - operationIdAndCurrentLocationSet = true; - } operationId = TabletOperationId.from(val); } diff --git a/core/src/test/java/org/apache/accumulo/core/metadata/schema/TabletMetadataTest.java b/core/src/test/java/org/apache/accumulo/core/metadata/schema/TabletMetadataTest.java index 8d7a969dcb..dfb42700a1 100644 --- a/core/src/test/java/org/apache/accumulo/core/metadata/schema/TabletMetadataTest.java +++ b/core/src/test/java/org/apache/accumulo/core/metadata/schema/TabletMetadataTest.java @@ -24,7 +24,6 @@ import static org.apache.accumulo.core.metadata.StoredTabletFile.serialize; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.COMPACT_COLUMN; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_COLUMN; -import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.OPID_QUAL; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.ECOMP; import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.HOSTING_GOAL; @@ -192,31 +191,6 @@ public class TabletMetadataTest { assertTrue(tm.isFutureAndCurrentLocationSet()); } - @Test - public void testTableOpAndCurrentLocation() { - final long tableId = 5L; - final KeyExtent extent = - new KeyExtent(TableId.of(Long.toString(tableId)), new Text("df"), new Text("da")); - - Mutation mutation = TabletColumnFamily.createPrevRowMutation(extent); - - // set a tablet operation as well as future location - mutation.at().family(MetadataSchema.TabletsSection.ServerColumnFamily.STR_NAME) - .qualifier(OPID_QUAL).put(TabletOperationType.DELETING + ":" + formatTid(tableId)); - mutation.at().family(FutureLocationColumnFamily.NAME).qualifier("s001").put("server1:8555"); - - SortedMap<Key,Value> rowMap = toRowMap(mutation); - - assertThrows(IllegalStateException.class, - () -> TabletMetadata.convertRow(rowMap.entrySet().iterator(), - EnumSet.allOf(ColumnType.class), false, false), - "tablet should not have operation id and current location at the same time"); - - TabletMetadata tm = TabletMetadata.convertRow(rowMap.entrySet().iterator(), - EnumSet.allOf(ColumnType.class), false, true); - assertTrue(tm.isOperationIdAndCurrentLocationSet()); - } - @Test public void testLocationStates() { KeyExtent extent = new KeyExtent(TableId.of("5"), new Text("df"), new Text("da")); diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/AbstractTabletStateStore.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/AbstractTabletStateStore.java index a0ffa8bc28..e84748f324 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/AbstractTabletStateStore.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/AbstractTabletStateStore.java @@ -50,7 +50,6 @@ public abstract class AbstractTabletStateStore implements TabletStateStore { try (var tabletsMutator = ample.conditionallyMutateTablets()) { for (Assignment assignment : assignments) { var conditionalMutator = tabletsMutator.mutateTablet(assignment.tablet) - .requireAbsentOperation() .requireLocation(TabletMetadata.Location.future(assignment.server)) .requirePrevEndRow(assignment.tablet.prevEndRow()) .putLocation(TabletMetadata.Location.current(assignment.server)) @@ -124,11 +123,14 @@ public abstract class AbstractTabletStateStore implements TabletStateStore { throws DistributedStoreException { try (var tabletsMutator = ample.conditionallyMutateTablets()) { for (TabletMetadata tm : tablets) { - var tabletMutator = tabletsMutator.mutateTablet(tm.getExtent()).requireAbsentOperation() - .requirePrevEndRow(tm.getExtent().prevEndRow()); + if (tm.getLocation() == null) { + continue; + } + + var tabletMutator = tabletsMutator.mutateTablet(tm.getExtent()) + .requireLocation(tm.getLocation()).requirePrevEndRow(tm.getExtent().prevEndRow()); if (tm.hasCurrent()) { - tabletMutator.requireLocation(tm.getLocation()); ManagerMetadataUtil.updateLastForAssignmentMode(context, tabletMutator, tm.getLocation().getServerInstance(), tm.getLast()); @@ -146,7 +148,6 @@ public abstract class AbstractTabletStateStore implements TabletStateStore { if (tm.getLocation() != null && tm.getLocation().getType() != null && tm.getLocation().getType().equals(LocationType.FUTURE)) { - tabletMutator.requireLocation(tm.getLocation()); tabletMutator.deleteLocation(tm.getLocation()); } diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java index 88b26220d5..0e1bc45f15 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/CurrentState.java @@ -45,11 +45,6 @@ public interface CurrentState { */ Set<KeyExtent> migrationsSnapshot(); - // ELASTICITY_TODO this approach to requesting unassignments was a quick hack - default Set<KeyExtent> getUnassignmentRequest() { - throw new UnsupportedOperationException(); - } - ManagerState getManagerState(); // ELASTICITIY_TODO it would be nice if this method could take DataLevel as an argument and only diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java index e274d275a7..fcc94f2169 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java @@ -83,7 +83,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Joiner; -import com.google.common.collect.Sets; import com.google.gson.reflect.TypeToken; /** @@ -293,9 +292,10 @@ public class TabletManagementIterator extends SkippingIterator { TabletState state = TabletState.compute(tm, current, balancer, tserverResourceGroups); if (LOG.isTraceEnabled()) { - LOG.trace("{} is {}. Table is {}line. Tablet hosting goal is {}, hostingRequested: {}", + LOG.trace( + "{} is {}. Table is {}line. Tablet hosting goal is {}, hostingRequested: {}, opId: {}", tm.getExtent(), state, (shouldBeOnline ? "on" : "off"), tm.getHostingGoal(), - tm.getHostingRequested()); + tm.getHostingRequested(), tm.getOperationId()); } switch (state) { case ASSIGNED: @@ -344,11 +344,7 @@ public class TabletManagementIterator extends SkippingIterator { TabletManagementIterator.setCurrentServers(tabletChange, state.onlineTabletServers()); TabletManagementIterator.setOnlineTables(tabletChange, state.onlineTables()); TabletManagementIterator.setMerges(tabletChange, state.merges()); - // ELASTICITY_TODO passing the unassignemnt request as part of the migrations is a hack. Was - // not sure of the entire unassignment request approach and did not want to push it further - // into the code. - TabletManagementIterator.setMigrations(tabletChange, - Sets.union(state.migrationsSnapshot(), state.getUnassignmentRequest())); + TabletManagementIterator.setMigrations(tabletChange, state.migrationsSnapshot()); TabletManagementIterator.setManagerState(tabletChange, state.getManagerState()); TabletManagementIterator.setShuttingDown(tabletChange, state.shutdownServers()); TabletManagementIterator.setTServerResourceGroups(tabletChange, @@ -466,7 +462,7 @@ public class TabletManagementIterator extends SkippingIterator { private void computeTabletManagementActions(final TabletMetadata tm, final Set<ManagementAction> reasonsToReturnThisTablet) { - if (tm.isFutureAndCurrentLocationSet() || tm.isOperationIdAndCurrentLocationSet()) { + if (tm.isFutureAndCurrentLocationSet()) { // no need to check everything, we are in a known state where we want to return everything. reasonsToReturnThisTablet.add(ManagementAction.BAD_STATE); return; diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ConditionalTabletMutatorImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ConditionalTabletMutatorImpl.java index 55b5d9f9c6..4393a09110 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ConditionalTabletMutatorImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ConditionalTabletMutatorImpl.java @@ -100,6 +100,7 @@ public class ConditionalTabletMutatorImpl extends TabletMutatorBase<Ample.Condit Preconditions.checkState(updatesEnabled, "Cannot make updates after calling mutate."); Preconditions.checkArgument(location.getType() == TabletMetadata.LocationType.FUTURE || location.getType() == TabletMetadata.LocationType.CURRENT); + sawOperationRequirement = true; Condition c = new Condition(getLocationFamily(location.getType()), location.getSession()) .setValue(location.getHostPort()); mutation.addCondition(c); diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java index cc0f3bd600..f109991cc1 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/ManagerMetadataUtil.java @@ -187,6 +187,7 @@ public class ManagerMetadataUtil { TServerInstance tServerInstance, ServiceLock zooLock, Set<String> unusedWalLogs, Location lastLocation, long flushId) { + // ELASTICITY_TODO use conditional mutation and require tablet location TabletMutator tablet = context.getAmple().mutateTablet(extent); // if there are no entries, the path doesn't get stored in metadata table, only the flush ID Optional<StoredTabletFile> newFile = Optional.empty(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java b/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java index 7b13303d70..8c9d217cd8 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java @@ -771,8 +771,6 @@ class FateServiceHandler implements FateService.Iface { "Length of requested split exceeds tables configured max, see warning in logs for more information."); } - manager.requestUnassignment(extent, opid); - goalMessage = "Splitting " + extent + " for user into " + (splits.size() + 1) + " tablets"; manager.fate().seedTransaction(op.toString(), opid, new PreSplit(extent, splits), autoCleanup, goalMessage); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 57936973c0..c960e786c9 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -595,46 +595,6 @@ public class Manager extends AbstractServer } } - // ELASTICITY_TODO the following should probably be a cache w/ timeout so that things that are - // never removed will age off. Unsure about the approach, so want to hold on off on doing more - // work. It was a quick hack added so that splits could get the manager to unassign tablets. - private final Map<KeyExtent,Set<Long>> unassignmentRequest = - Collections.synchronizedMap(new HashMap<>()); - - @Override - public Set<KeyExtent> getUnassignmentRequest() { - synchronized (unassignmentRequest) { - return Set.copyOf(unassignmentRequest.keySet()); - } - } - - public void requestUnassignment(KeyExtent tablet, long fateTxId) { - unassignmentRequest.compute(tablet, (k, v) -> { - Set<Long> txids = v == null ? new HashSet<>() : v; - txids.add(fateTxId); - return txids; - }); - - nextEvent.event(tablet, "Unassignment requested %s", tablet); - } - - public void cancelUnassignmentRequest(KeyExtent tablet, long fateTxid) { - unassignmentRequest.computeIfPresent(tablet, (k, v) -> { - v.remove(fateTxid); - if (v.isEmpty()) { - return null; - } - - return v; - }); - - nextEvent.event(tablet, "Unassignment request canceled %s", tablet); - } - - public boolean isUnassignmentRequested(KeyExtent extent) { - return unassignmentRequest.containsKey(extent); - } - private Splitter splitter; public Splitter getSplitter() { @@ -732,10 +692,6 @@ public class Manager extends AbstractServer return TabletGoalState.UNASSIGNED; } - if (unassignmentRequest.containsKey(extent)) { - return TabletGoalState.UNASSIGNED; - } - if (tm.getOperationId() != null) { return TabletGoalState.UNASSIGNED; } @@ -1911,5 +1867,4 @@ public class Manager extends AbstractServer throw new IllegalStateException("Unhandled DataLevel value: " + level); } } - } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java b/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java index 37b6bc9456..46a650236b 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/ManagerClientServiceHandler.java @@ -621,11 +621,11 @@ public class ManagerClientServiceHandler implements ManagerClientService.Iface { manager.mustBeOnline(tableId); - log.info("Tablet hosting requested for: {} ", extents); final List<KeyExtent> success = new ArrayList<>(); final Ample ample = manager.getContext().getAmple(); try (var mutator = ample.conditionallyMutateTablets()) { extents.forEach(e -> { + log.info("Tablet hosting requested for: {} ", KeyExtent.fromThrift(e)); KeyExtent ke = KeyExtent.fromThrift(e); if (recentHostingRequest.getIfPresent(ke) == null) { mutator.mutateTablet(ke).requireAbsentOperation() diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 23c8501207..31c15071a4 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -374,12 +374,6 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { tm.getExtent() + " is both assigned and hosted, which should never happen: " + this, tm.getExtent().toMetaRow()); } - if (tm.isOperationIdAndCurrentLocationSet()) { - throw new BadLocationStateException( - tm.getExtent() - + " has both operation id and current location, which should never happen: " + this, - tm.getExtent().toMetaRow()); - } final TableId tableId = tm.getTableId(); // ignore entries for tables that do not exist in zookeeper @@ -429,6 +423,10 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { goal = TabletGoalState.UNASSIGNED; } + if (tm.getOperationId() != null) { + goal = TabletGoalState.UNASSIGNED; + } + if (Manager.log.isTraceEnabled()) { Manager.log.trace( "[{}] Shutting down all Tservers: {}, dependentCount: {} Extent: {}, state: {}, goal: {} actions:{}", diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/split/SplitTask.java b/server/manager/src/main/java/org/apache/accumulo/manager/split/SplitTask.java index 16f2682f98..0c60e9347f 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/split/SplitTask.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/split/SplitTask.java @@ -85,10 +85,6 @@ public class SplitTask implements Runnable { long fateTxId = manager.fate().startTransaction(); - if (tablet.getLocation() != null) { - manager.requestUnassignment(tablet.getExtent(), fateTxId); - } - manager.fate().seedTransaction("SYSTEM_SPLIT", fateTxId, new PreSplit(extent, splits), true, "System initiated split of tablet " + extent + " into " + splits.size() + " splits"); } catch (Exception e) { diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/DeleteOperationIds.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/DeleteOperationIds.java index d5ec7f9a88..a2c7169ea5 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/DeleteOperationIds.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/DeleteOperationIds.java @@ -52,8 +52,8 @@ public class DeleteOperationIds extends ManagerRepo { tabletMetadata -> !opid.equals(tabletMetadata.getOperationId()); splitInfo.getTablets().forEach(extent -> { - tabletsMutator.mutateTablet(extent).requireOperation(opid).deleteOperation() - .submit(rejectionHandler); + tabletsMutator.mutateTablet(extent).requireOperation(opid).requireAbsentLocation() + .deleteOperation().submit(rejectionHandler); }); var results = tabletsMutator.process(); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java index 1e7a1592ce..5e6505f58d 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java @@ -18,6 +18,10 @@ */ package org.apache.accumulo.manager.tableOps.split; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.OPID; +import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -31,7 +35,6 @@ import org.apache.accumulo.core.fate.Repo; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.Ample.ConditionalResult.Status; import org.apache.accumulo.core.metadata.schema.TabletMetadata; -import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType; import org.apache.accumulo.core.metadata.schema.TabletOperationId; import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.manager.Manager; @@ -73,38 +76,48 @@ public class PreSplit extends ManagerRepo { // that have not completed their first step. Once splits starts running, would like it to move // through as quickly as possible. - try (var tabletsMutator = manager.getContext().getAmple().conditionallyMutateTablets()) { - - tabletsMutator.mutateTablet(splitInfo.getOriginal()).requireAbsentOperation() - .requireAbsentLocation().requirePrevEndRow(splitInfo.getOriginal().prevEndRow()) - .putOperation(opid).submit(tmeta -> opid.equals(tmeta.getOperationId())); + var tabletMetadata = manager.getContext().getAmple().readTablet(splitInfo.getOriginal(), + PREV_ROW, LOCATION, OPID); - Map<KeyExtent,Ample.ConditionalResult> results = tabletsMutator.process(); + log.trace("Attempting tablet split {} {} {}", FateTxId.formatTid(tid), splitInfo.getOriginal(), + tabletMetadata == null ? null : tabletMetadata.getLocation()); - if (results.get(splitInfo.getOriginal()).getStatus() == Status.ACCEPTED) { - log.debug("{} reserved {} for split", FateTxId.formatTid(tid), splitInfo.getOriginal()); + if (tabletMetadata == null || (tabletMetadata.getOperationId() != null + && !opid.equals(tabletMetadata.getOperationId()))) { + // tablet no longer exists or is reserved by another operation + return 0; + } else if (opid.equals(tabletMetadata.getOperationId())) { + if (tabletMetadata.getLocation() == null) { + // the operation id is set and there is no location, so can proceed to split return 0; } else { - var tabletMetadata = results.get(splitInfo.getOriginal()).readMetadata(); - - // its possible the tablet no longer exists - var optMeta = Optional.ofNullable(tabletMetadata); - - log.debug( - "{} Failed to set operation id (may have location or operationId). extent:{} location:{} opid:{}", - FateTxId.formatTid(tid), splitInfo.getOriginal(), - optMeta.map(TabletMetadata::getLocation).orElse(null), - optMeta.map(TabletMetadata::getOperationId).orElse(null)); - - if (tabletMetadata != null && tabletMetadata.getLocation() != null) { - // the tablet exists but has a location, lets try again later - manager.requestUnassignment(tabletMetadata.getExtent(), tid); - return 2000; + // the operation id was set, but a location is also set wait for it be unset + return 1000; + } + } else { + try (var tabletsMutator = manager.getContext().getAmple().conditionallyMutateTablets()) { + + tabletsMutator.mutateTablet(splitInfo.getOriginal()).requireAbsentOperation() + .requireSame(tabletMetadata, LOCATION, PREV_ROW).putOperation(opid) + .submit(tmeta -> opid.equals(tmeta.getOperationId())); + + Map<KeyExtent,Ample.ConditionalResult> results = tabletsMutator.process(); + if (results.get(splitInfo.getOriginal()).getStatus() == Status.ACCEPTED) { + log.trace("Successfully set operation id for split {}", FateTxId.formatTid(tid)); + if (tabletMetadata.getLocation() == null) { + // the operation id was set and there is no location, so can move on + return 0; + } else { + // now that the operation id set, generate an event to unload the tablet + manager.getEventCoordinator().event(splitInfo.getOriginal(), + "Set operation id %s on tablet for split", FateTxId.formatTid(tid)); + // the operation id was set, but a location is also set wait for it be unset + return 1000; + } } else { - // The tablet may no longer exists, another operation may have it reserved, or maybe we - // already reserved and a fault happened. In any case lets proceed, the tablet will be - // checked in the call() function and it will sort everything out. - return 0; + log.trace("Failed to set operation id for split {}", FateTxId.formatTid(tid)); + // something changed with the tablet, so setting the operation id failed. Try again later + return 1000; } } } @@ -112,13 +125,11 @@ public class PreSplit extends ManagerRepo { @Override public Repo<Manager> call(long tid, Manager manager) throws Exception { - // ELASTICITY_TODO need to make manager ignore tablet with an operation id for assignment - // purposes - manager.cancelUnassignmentRequest(splitInfo.getOriginal(), tid); + manager.getSplitter().removeSplitStarting(splitInfo.getOriginal()); - TabletMetadata tabletMetadata = manager.getContext().getAmple().readTablet( - splitInfo.getOriginal(), ColumnType.PREV_ROW, ColumnType.LOCATION, ColumnType.OPID); + TabletMetadata tabletMetadata = manager.getContext().getAmple() + .readTablet(splitInfo.getOriginal(), PREV_ROW, LOCATION, OPID); var opid = TabletOperationId.from(TabletOperationType.SPLITTING, tid); @@ -133,6 +144,12 @@ public class PreSplit extends ManagerRepo { return null; } + // Its expected that the tablet has no location at this point and if it does its an indication + // of a bug. + Preconditions.checkState(tabletMetadata.getLocation() == null, + "Tablet unexpectedly had location set %s %s %s", FateTxId.formatTid(tid), + tabletMetadata.getLocation(), tabletMetadata.getExtent()); + // Create the dir name here for the next step. If the next step fails it will always have the // same dir name each time it runs again making it idempotent. @@ -150,8 +167,6 @@ public class PreSplit extends ManagerRepo { @Override public void undo(long tid, Manager manager) throws Exception { // TODO is this called if isReady fails? - // TODO should operation id be cleaned up? maybe not, tablet may be in an odd state - manager.cancelUnassignmentRequest(splitInfo.getOriginal(), tid); manager.getSplitter().removeSplitStarting(splitInfo.getOriginal()); } } diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java index a804bedd73..28131f795e 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java @@ -86,6 +86,10 @@ public class UpdateTablets extends ManagerRepo { "Tablet %s does not have expected operation id %s it has %s", splitInfo.getOriginal(), opid, tabletMetadata.getOperationId()); + Preconditions.checkState(tabletMetadata.getLocation() == null, + "Tablet %s unexpectedly has a location %s", splitInfo.getOriginal(), + tabletMetadata.getLocation()); + var newTablets = splitInfo.getTablets(); var newTabletsFiles = getNewTabletFiles(newTablets, tabletMetadata, @@ -213,7 +217,7 @@ public class UpdateTablets extends ManagerRepo { var newExtent = newTablets.last(); var mutator = tabletsMutator.mutateTablet(splitInfo.getOriginal()).requireOperation(opid) - .requirePrevEndRow(splitInfo.getOriginal().prevEndRow()); + .requirePrevEndRow(splitInfo.getOriginal().prevEndRow()).requireAbsentLocation(); mutator.putPrevEndRow(newExtent.prevEndRow()); @@ -251,9 +255,11 @@ public class UpdateTablets extends ManagerRepo { var tabletMeta = manager.getContext().getAmple().readTablet(newExtent); - if (tabletMeta == null || !tabletMeta.getOperationId().equals(opid)) { + if (tabletMeta == null || !tabletMeta.getOperationId().equals(opid) + || tabletMeta.getLocation() != null) { throw new IllegalStateException("Failed to update existing tablet in split " - + splitInfo.getOriginal() + " " + result.getStatus() + " " + result.getExtent()); + + splitInfo.getOriginal() + " " + result.getStatus() + " " + result.getExtent() + " " + + (tabletMeta == null ? null : tabletMeta.getLocation())); } else { // ELASTICITY_TODO } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/AssignmentHandler.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/AssignmentHandler.java index 7694747e3f..f4cac95747 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/AssignmentHandler.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/AssignmentHandler.java @@ -285,7 +285,7 @@ class AssignmentHandler implements Runnable { return false; } - if (meta.getOperationId() != null) { + if (meta.getOperationId() != null && meta.getLocation() == null) { log.info(METADATA_ISSUE + "metadata entry has a FATE operation id {} {} {}", extent, loc, meta.getOperationId()); return false; diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanfileManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanfileManager.java index 4513dca855..46cd47b4fd 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanfileManager.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanfileManager.java @@ -130,7 +130,7 @@ class ScanfileManager { } if (!filesToDelete.isEmpty()) { - // ELASTICTIY_TODO use conditional mutation + // ELASTICTIY_TODO use conditional mutation and require the tablet location log.debug("Removing scan refs from metadata {} {}", tablet.getExtent(), filesToDelete); MetadataTableUtil.removeScanFiles(tablet.getExtent(), filesToDelete, tablet.getContext(), tablet.getTabletServer().getLock()); diff --git a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java index 00193d2e2d..a90b5aace3 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/ManagerAssignmentIT.java @@ -405,6 +405,8 @@ public class ManagerAssignmentIT extends SharedMiniClusterBase { c.securityOperations().grantTablePermission(getPrincipal(), MetadataTable.NAME, TablePermission.WRITE); + // Set the OperationId on one tablet, which will cause that tablet + // to not be assigned try (var writer = c.createBatchWriter(MetadataTable.NAME)) { var extent = new KeyExtent(tableId, new Text("m"), new Text("f")); var opid = TabletOperationId.from(TabletOperationType.SPLITTING, 42L); @@ -413,13 +415,37 @@ public class ManagerAssignmentIT extends SharedMiniClusterBase { writer.addMutation(m); } + // Host all tablets. c.tableOperations().setTabletHostingGoal(tableName, new Range(), TabletHostingGoal.ALWAYS); + Wait.waitFor(() -> countTabletsWithLocation(c, tableId) == 3); + var ample = ((ClientContext) c).getAmple(); + assertNull( + ample.readTablet(new KeyExtent(tableId, new Text("m"), new Text("f"))).getLocation()); - Wait.waitFor(() -> countTabletsWithLocation(c, tableId) >= 3); + // Delete the OperationId column, tablet should be assigned + try (var writer = c.createBatchWriter(MetadataTable.NAME)) { + var extent = new KeyExtent(tableId, new Text("m"), new Text("f")); + Mutation m = new Mutation(extent.toMetaRow()); + TabletsSection.ServerColumnFamily.OPID_COLUMN.putDelete(m); + writer.addMutation(m); + } + Wait.waitFor(() -> countTabletsWithLocation(c, tableId) == 4); - // there are four tablets, but one has an operation id set and should not be assigned - assertEquals(3, countTabletsWithLocation(c, tableId)); + // Set the OperationId on one tablet, which will cause that tablet + // to be unhosted + try (var writer = c.createBatchWriter(MetadataTable.NAME)) { + var extent = new KeyExtent(tableId, new Text("m"), new Text("f")); + var opid = TabletOperationId.from(TabletOperationType.SPLITTING, 42L); + Mutation m = new Mutation(extent.toMetaRow()); + TabletsSection.ServerColumnFamily.OPID_COLUMN.put(m, new Value(opid.canonical())); + writer.addMutation(m); + } + // there are four tablets, three should be assigned as one has a OperationId + Wait.waitFor(() -> countTabletsWithLocation(c, tableId) == 3); + assertNull( + ample.readTablet(new KeyExtent(tableId, new Text("m"), new Text("f"))).getLocation()); + // Delete the OperationId column, tablet should be assigned again try (var writer = c.createBatchWriter(MetadataTable.NAME)) { var extent = new KeyExtent(tableId, new Text("m"), new Text("f")); Mutation m = new Mutation(extent.toMetaRow()); @@ -427,10 +453,8 @@ public class ManagerAssignmentIT extends SharedMiniClusterBase { writer.addMutation(m); } - Wait.waitFor(() -> countTabletsWithLocation(c, tableId) >= 4); - // after the operation id is deleted the tablet should be assigned - assertEquals(4, countTabletsWithLocation(c, tableId)); + Wait.waitFor(() -> countTabletsWithLocation(c, tableId) == 4); } } diff --git a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java index 6fe2a309bf..36fa313fd7 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java @@ -157,7 +157,7 @@ public class TabletManagementIteratorIT extends AccumuloClusterHarness { // w/o a location. Only one should need attention because of the operation id. setOperationId(client, metaCopy1, t1); assertEquals(1, findTabletsNeedingAttention(client, metaCopy1, state), - "Should have not tablets needing attention because of operation id"); + "Should have tablets needing attention because of operation id"); // test the cases where the assignment is to a dead tserver reassignLocation(client, metaCopy2, t3); @@ -414,11 +414,6 @@ public class TabletManagementIteratorIT extends AccumuloClusterHarness { return Collections.emptySet(); } - @Override - public Set<KeyExtent> getUnassignmentRequest() { - return Collections.emptySet(); - } - @Override public ManagerState getManagerState() { return ManagerState.NORMAL; diff --git a/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java b/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java index 6b199eb860..1806f4c638 100644 --- a/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java +++ b/test/src/main/java/org/apache/accumulo/test/manager/MergeStateIT.java @@ -42,6 +42,7 @@ import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.manager.state.TabletManagement; +import org.apache.accumulo.core.manager.state.TabletManagement.ManagementAction; import org.apache.accumulo.core.manager.thrift.ManagerState; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.TServerInstance; @@ -120,11 +121,6 @@ public class MergeStateIT extends ConfigurableMacBase { return Collections.emptySet(); } - @Override - public Set<KeyExtent> getUnassignmentRequest() { - return Collections.emptySet(); - } - } private static void update(AccumuloClient c, Mutation m) @@ -177,10 +173,12 @@ public class MergeStateIT extends ConfigurableMacBase { int count = 0; for (TabletManagement mti : metaDataStateStore) { if (mti != null) { + assertEquals(1, mti.actions.size()); + assertEquals(ManagementAction.NEEDS_LOCATION_UPDATE, mti.getActions().iterator().next()); count++; } } - assertEquals(0, count); // the normal case is to skip tablets in a good state + assertEquals(6, count); // Create the hole // Split the tablet at one end of the range