Repository: accumulo Updated Branches: refs/heads/1.6 ddd3932d8 -> 25475d0a3 refs/heads/1.7 5ef5b8538 -> e76d89a8b refs/heads/master 0e68954ab -> b0cb73e1c
ACCUMULO-2388 Make clients retry in case of HoldTimeoutException Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/6965fb07 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/6965fb07 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/6965fb07 Branch: refs/heads/1.6 Commit: 6965fb07c97cd752d6f5415a39e87b9ff3c1ba7e Parents: ddd3932 Author: Keith Turner <ktur...@apache.org> Authored: Fri Jun 12 17:47:18 2015 -0400 Committer: Keith Turner <ktur...@apache.org> Committed: Fri Jun 12 17:47:18 2015 -0400 ---------------------------------------------------------------------- .../apache/accumulo/tserver/TabletServer.java | 49 +++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/6965fb07/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java ---------------------------------------------------------------------- diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java index 0446da3..870abd8 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java @@ -1573,9 +1573,10 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu public void applyUpdates(TInfo tinfo, long updateID, TKeyExtent tkeyExtent, List<TMutation> tmutations) { UpdateSession us = (UpdateSession) sessionManager.reserveSession(updateID); if (us == null) { - throw new RuntimeException("No Such SessionID"); + return; } + boolean reserved = true; try { KeyExtent keyExtent = new KeyExtent(tkeyExtent); setUpdateTablet(us, keyExtent); @@ -1587,11 +1588,22 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu mutations.add(mutation); us.queuedMutationSize += mutation.numBytes(); } - if (us.queuedMutationSize > getSystemConfiguration().getMemoryInBytes(Property.TSERV_MUTATION_QUEUE_MAX)) - flush(us); + if (us.queuedMutationSize > getSystemConfiguration().getMemoryInBytes(Property.TSERV_MUTATION_QUEUE_MAX)) { + try{ + flush(us); + }catch(HoldTimeoutException hte){ + // Assumption is that the client has timed out and is gone. If thats not the case, then removing the session should cause the client to fail + // in such a way that it retries. + log.debug("HoldTimeoutException during applyUpdates, removing session"); + sessionManager.removeSession(updateID, true); + reserved = false; + } + } } } finally { - sessionManager.unreserveSession(us); + if(reserved) { + sessionManager.unreserveSession(us); + } } } @@ -1649,10 +1661,6 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu mutationCount += mutations.size(); - } catch (HoldTimeoutException t) { - error = t; - log.debug("Giving up on mutations due to a long memory hold time"); - break; } catch (Throwable t) { error = t; log.error("Unexpected error preparing for commit", error); @@ -1769,6 +1777,10 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu try { flush(us); + } catch (HoldTimeoutException e) { + //Assumption is that the client has timed out and is gone. If thats not the case throw an exception that will cause it to retry. + log.debug("HoldTimeoutException during closeUpdate, reporting no such session"); + throw new NoSuchScanIDException(); } finally { writeTracker.finishWrite(opid); } @@ -1807,8 +1819,14 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu throw new NotServingTabletException(tkeyExtent); } - if (!keyExtent.isMeta()) - TabletServer.this.resourceManager.waitUntilCommitsAreEnabled(); + if (!keyExtent.isMeta()) { + try { + TabletServer.this.resourceManager.waitUntilCommitsAreEnabled(); + } catch (HoldTimeoutException hte) { + //Major hack. Assumption is that the client has timed out and is gone. If thats not the case, then throwing the following will let client know there was a failure and it should retry. + throw new NotServingTabletException(tkeyExtent); + } + } long opid = writeTracker.startWrite(TabletType.type(keyExtent)); @@ -2088,8 +2106,15 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu if (cs == null || cs.interruptFlag.get()) throw new NoSuchScanIDException(); - if (!cs.tableId.equals(MetadataTable.ID) && !cs.tableId.equals(RootTable.ID)) - TabletServer.this.resourceManager.waitUntilCommitsAreEnabled(); + if (!cs.tableId.equals(MetadataTable.ID) && !cs.tableId.equals(RootTable.ID)){ + try{ + TabletServer.this.resourceManager.waitUntilCommitsAreEnabled(); + } catch (HoldTimeoutException hte){ + //Assumption is that the client has timed out and is gone. If thats not the case throw an exception that will cause it to retry. + log.debug("HoldTimeoutException during conditionalUpdate, reporting no such session"); + throw new NoSuchScanIDException(); + } + } Text tid = new Text(cs.tableId); long opid = writeTracker.startWrite(TabletType.type(new KeyExtent(tid, null, null)));