This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new 617bfcbd36 fixes race condition in metadata consistency check (#3392) 617bfcbd36 is described below commit 617bfcbd36ac05955343a2f77c3423274c04df74 Author: Keith Turner <ktur...@apache.org> AuthorDate: Thu May 11 15:35:01 2023 -0400 fixes race condition in metadata consistency check (#3392) While looking into #3386 I noticed the Accumulo metadata consistency check was incrementing a counter in the incorrect place. It should increment before writing to the metadata table, but it does not. This could cause the check to report false postives. The false positive in the case would be transient and should not repeat on subsequent checks. Also noticed a redundant check when deciding if the file should be added to the set of in memory files. AFICT this redundant check is harmless, but it could cause problems for future changes. --- .../accumulo/tserver/tablet/DatafileManager.java | 103 +++++++++++---------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java index 3873d4efb0..5b349c3f67 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/DatafileManager.java @@ -242,31 +242,30 @@ class DatafileManager { if (tablet.getExtent().isMeta()) { throw new IllegalArgumentException("Can not import files to a metadata tablet"); } - - synchronized (bulkFileImportLock) { - - if (!paths.isEmpty()) { - long bulkTime = Long.MIN_VALUE; - if (setTime) { - for (DataFileValue dfv : paths.values()) { - long nextTime = tablet.getAndUpdateTime(); - if (nextTime < bulkTime) { - throw new IllegalStateException( - "Time went backwards unexpectedly " + nextTime + " " + bulkTime); + // increment start count before metadata update AND updating in memory map of files + metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart); + // do not place any code here between above stmt and try{}finally + try { + synchronized (bulkFileImportLock) { + + if (!paths.isEmpty()) { + long bulkTime = Long.MIN_VALUE; + if (setTime) { + for (DataFileValue dfv : paths.values()) { + long nextTime = tablet.getAndUpdateTime(); + if (nextTime < bulkTime) { + throw new IllegalStateException( + "Time went backwards unexpectedly " + nextTime + " " + bulkTime); + } + bulkTime = nextTime; + dfv.setTime(bulkTime); } - bulkTime = nextTime; - dfv.setTime(bulkTime); } - } - newFiles = tablet.updatePersistedTime(bulkTime, paths, tid); + newFiles = tablet.updatePersistedTime(bulkTime, paths, tid); + } } - } - // increment start count before metadata update AND updating in memory map of files - metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart); - // do not place any code here between above stmt and try{}finally - try { synchronized (tablet) { for (Entry<StoredTabletFile,DataFileValue> tpath : newFiles.entrySet()) { if (datafileSizes.containsKey(tpath.getKey())) { @@ -350,41 +349,43 @@ class DatafileManager { logFileOnly.add(unusedWalLog); } } - try { - // the order of writing to metadata and walog is important in the face of machine/process - // failures need to write to metadata before writing to walog, when things are done in the - // reverse order data could be lost... the minor compaction start even should be written - // before the following metadata write is made - newFile = tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv, - unusedWalLogs, flushId); - - // Mark that we have data we want to replicate - // This WAL could still be in use by other Tablets *from the same table*, so we can only mark - // that there is data to replicate, - // but it is *not* closed. We know it is not closed by the fact that this MinC triggered. A - // MinC cannot happen unless the - // tablet is online and thus these WALs are referenced by that tablet. Therefore, the WAL - // replication status cannot be 'closed'. - if (replicate) { - if (log.isDebugEnabled()) { - log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(), - logFileOnly); - } - for (String logFile : logFileOnly) { - @SuppressWarnings("deprecation") - Status status = org.apache.accumulo.server.replication.StatusUtil.openWithUnknownLength(); - ReplicationTableUtil.updateFiles(tablet.getContext(), tablet.getExtent(), logFile, - status); - } - } - } finally { - tablet.finishClearingUnusedLogs(); - } // increment start count before metadata update AND updating in memory map of files metadataUpdateCount.updateAndGet(MetadataUpdateCount::incrementStart); // do not place any code here between above stmt and try{}finally try { + try { + // the order of writing to metadata and walog is important in the face of machine/process + // failures need to write to metadata before writing to walog, when things are done in the + // reverse order data could be lost... the minor compaction start even should be written + // before the following metadata write is made + newFile = tablet.updateTabletDataFile(commitSession.getMaxCommittedTime(), newDatafile, dfv, + unusedWalLogs, flushId); + + // Mark that we have data we want to replicate + // This WAL could still be in use by other Tablets *from the same table*, so we can only + // mark + // that there is data to replicate, + // but it is *not* closed. We know it is not closed by the fact that this MinC triggered. A + // MinC cannot happen unless the + // tablet is online and thus these WALs are referenced by that tablet. Therefore, the WAL + // replication status cannot be 'closed'. + if (replicate) { + if (log.isDebugEnabled()) { + log.debug("Recording that data has been ingested into {} using {}", tablet.getExtent(), + logFileOnly); + } + for (String logFile : logFileOnly) { + @SuppressWarnings("deprecation") + Status status = + org.apache.accumulo.server.replication.StatusUtil.openWithUnknownLength(); + ReplicationTableUtil.updateFiles(tablet.getContext(), tablet.getExtent(), logFile, + status); + } + } + } finally { + tablet.finishClearingUnusedLogs(); + } do { try { @@ -403,7 +404,7 @@ class DatafileManager { synchronized (tablet) { t1 = System.currentTimeMillis(); - if (dfv.getNumEntries() > 0 && newFile.isPresent()) { + if (newFile.isPresent()) { StoredTabletFile newFileStored = newFile.orElseThrow(); if (datafileSizes.containsKey(newFileStored)) { log.error("Adding file that is already in set {}", newFileStored);