This is an automated email from the ASF dual-hosted git repository. lide pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 5b9a1a2a5d avoiding a corrupt image file when there is image.ckpt with non-zero … (#9180) 5b9a1a2a5d is described below commit 5b9a1a2a5dd7c2216452864e9cf17d5534523837 Author: dataroaring <98214048+dataroar...@users.noreply.github.com> AuthorDate: Mon Apr 25 17:01:01 2022 +0800 avoiding a corrupt image file when there is image.ckpt with non-zero … (#9180) * avoiding a corrupt image file when there is image.ckpt with non-zero size For now, saveImage writes data to image.ckpt via an append FileOutputStream, when there is a non-zero size file named image.ckpt, a disaster would happen due to a corrupt image file. Even worse, fe only keeps the lastest image file and removes others. BTW, image file should be synced to disk. It is dangerous to only keep the latest image file, because an image file is validated when generating the next image file. Then we keep an non validated image file but remove validated ones. So I will issue a pr which keeps at least 2 image file. * append other data after MetaHeader * use channel.force instead of sync --- fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java | 9 +++++++-- fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java | 1 + fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java | 5 +++++ fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java | 8 +++++--- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index a670dd2aeb..86d6763b24 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -1966,8 +1966,13 @@ public class Catalog { } public void saveImage(File curFile, long replayedJournalId) throws IOException { - if (!curFile.exists()) { - curFile.createNewFile(); + if (curFile.exists()) { + if (!curFile.delete()) { + throw new IOException(curFile.getName() + " can not be deleted."); + } + } + if (!curFile.createNewFile()) { + throw new IOException(curFile.getName() + " can not be created."); } MetaWriter.write(curFile, this); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java index 9df82aa9b7..426cd7b3d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaFooter.java @@ -98,6 +98,7 @@ public class MetaFooter { long endIndex = raf.length(); raf.writeLong(endIndex - startIndex); MetaMagicNumber.write(raf); + raf.getChannel.force(true); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java index 5617a85448..ba91b04b5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaHeader.java @@ -75,10 +75,15 @@ public class MetaHeader { } public static long write(File imageFile) throws IOException { + if (imageFile.length() != 0) { + throw new IOException("Meta header has to be written to an empty file."); + } + try (RandomAccessFile raf = new RandomAccessFile(imageFile, "rw")) { raf.seek(0); MetaMagicNumber.write(raf); MetaJsonHeader.write(raf); + raf.getChannel.force(true); return raf.getFilePointer(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java index 6b9d9aa791..387a50be3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MetaWriter.java @@ -94,14 +94,15 @@ public class MetaWriter { public static void write(File imageFile, Catalog catalog) throws IOException { // save image does not need any lock. because only checkpoint thread will call this method. - LOG.info("start save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread()); - + LOG.info("start to save image to {}. is ckpt: {}", imageFile.getAbsolutePath(), Catalog.isCheckpointThread()); final Reference<Long> checksum = new Reference<>(0L); long saveImageStartTime = System.currentTimeMillis(); + // MetaHeader should use output stream in the future. long startPosition = MetaHeader.write(imageFile); List<MetaIndex> metaIndices = Lists.newArrayList(); + FileOutputStream imageFileOut = new FileOutputStream(imageFile, true); try (CountingDataOutputStream dos = new CountingDataOutputStream(new BufferedOutputStream( - new FileOutputStream(imageFile, true)), startPosition)) { + imageFileOut), startPosition)) { writer.setDelegate(dos, metaIndices); long replayedJournalId = catalog.getReplayedJournalId(); checksum.setRef(writer.doWork("header", () -> catalog.saveHeader(dos, replayedJournalId, checksum.getRef()))); @@ -128,6 +129,7 @@ public class MetaWriter { checksum.setRef(writer.doWork("plugins", () -> catalog.savePlugins(dos, checksum.getRef()))); checksum.setRef(writer.doWork("deleteHandler", () -> catalog.saveDeleteHandler(dos, checksum.getRef()))); checksum.setRef(writer.doWork("sqlBlockRule", () -> catalog.saveSqlBlockRule(dos, checksum.getRef()))); + imageFileOut.getChannel().force(true); } MetaFooter.write(imageFile, metaIndices, checksum.getRef()); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org