This is an automated email from the ASF dual-hosted git repository. lide pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new bdf915abd4 [Enhancement] (image) check image validity as soon as generated (#9011) bdf915abd4 is described below commit bdf915abd4e0d65e913d4caf9a8ea3ced88e85ab Author: Henry2SS <45096548+henry...@users.noreply.github.com> AuthorDate: Mon Apr 25 19:35:41 2022 +0800 [Enhancement] (image) check image validity as soon as generated (#9011) * load newly generated image file as soon as generated to check if it is valid. * delete the latest invalid image file * fix * fix * get filePath from saveImage() to ensure deleting the correct file while exception happens * fix Co-authored-by: wuhangze <wuhan...@jd.com> --- .../java/org/apache/doris/catalog/Catalog.java | 4 ++- .../java/org/apache/doris/master/Checkpoint.java | 34 +++++++++++++++++++++- .../java/org/apache/doris/persist/MetaCleaner.java | 11 +++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index 09a06a1596..e5e2914857 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -1950,7 +1950,8 @@ public class Catalog { } // Only called by checkpoint thread - public void saveImage() throws IOException { + // return the latest image file's absolute path + public String saveImage() throws IOException { // Write image.ckpt Storage storage = new Storage(this.imageDir); File curFile = storage.getImageFile(replayedJournalId.get()); @@ -1963,6 +1964,7 @@ public class Catalog { curFile.delete(); throw new IOException(); } + return curFile.getAbsolutePath(); } public void saveImage(File curFile, long replayedJournalId) throws IOException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java index 7865817a82..2d3c3ac271 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java @@ -35,6 +35,8 @@ import org.apache.doris.system.Frontend; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import com.google.common.base.Strings; + import java.io.IOException; import java.io.OutputStream; import java.net.HttpURLConnection; @@ -111,6 +113,8 @@ public class Checkpoint extends MasterDaemon { catalog = Catalog.getCurrentCatalog(); catalog.setEditLog(editLog); createStaticFieldForCkpt(); + boolean exceptionCaught = false; + String latestImageFilePath = null; try { catalog.loadImage(imageDir); catalog.replayJournal(checkPointVersion); @@ -119,13 +123,25 @@ public class Checkpoint extends MasterDaemon { checkPointVersion, catalog.getReplayedJournalId())); } catalog.fixBugAfterMetadataReplayed(false); - catalog.saveImage(); + latestImageFilePath = catalog.saveImage(); replayedJournalId = catalog.getReplayedJournalId(); + + // destroy checkpoint catalog, reclaim memory + catalog = null; + Catalog.destroyCheckpoint(); + destroyStaticFieldForCkpt(); + + // Load image to verify if the newly generated image file is valid + // If success, do all the following jobs + // If failed, just return + catalog = Catalog.getCurrentCatalog(); + catalog.loadImage(imageDir); if (MetricRepo.isInit) { MetricRepo.COUNTER_IMAGE_WRITE_SUCCESS.increase(1L); } LOG.info("checkpoint finished save image.{}", replayedJournalId); } catch (Throwable e) { + exceptionCaught = true; e.printStackTrace(); LOG.error("Exception when generate new image file", e); if (MetricRepo.isInit) { @@ -137,6 +153,22 @@ public class Checkpoint extends MasterDaemon { catalog = null; Catalog.destroyCheckpoint(); destroyStaticFieldForCkpt(); + // if new image generated && exception caught, delete the latest image here + // delete the newest image file, cuz it is invalid + if ((!Strings.isNullOrEmpty(latestImageFilePath)) && exceptionCaught) { + MetaCleaner cleaner = new MetaCleaner(Config.meta_dir + "/image"); + try { + cleaner.cleanTheLatestInvalidImageFile(latestImageFilePath); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_IMAGE_CLEAN_SUCCESS.increase(1L); + } + } catch (Throwable ex) { + LOG.error("Master delete latest invalid image file failed.", ex); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_IMAGE_CLEAN_FAILED.increase(1L); + } + } + } } // push image file to all the other non master nodes diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java index 2a7acd1f09..dad0f0d0aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java @@ -67,6 +67,17 @@ public class MetaCleaner { } } } + + public void cleanTheLatestInvalidImageFile(String path) throws IOException { + File latestInvalidImage = new File(path); + if (latestInvalidImage.exists()) { + if (latestInvalidImage.delete()) { + LOG.info(latestInvalidImage.getAbsoluteFile() + " deleted."); + } else { + LOG.warn(latestInvalidImage.getAbsoluteFile() + " delete failed."); + } + } + } private String fileType(File file) throws IOException { String type = null; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org