totph created this revision. totph added reviewers: jkorous, arphaman, ributzka. Herald added a subscriber: mgrang. Herald added a project: All. totph requested review of this revision. Herald added subscribers: cfe-commits, MaskRay. Herald added a project: clang.
[clang] Deterministic `-gen-cdb-fragment-path` filenames The files written into the given target directory follow the pattern `ab.c.X.json`. Instead of X being 4 randomly generated hexadecimal chars, it is now a deterministic hash of the input and output filenames encoded in 10 base64 chars. This means the output directory no longer fills up with duplicates, and concatenating the `*.json` files continues to generates a useful compilation database even after the first full project build. The output file is also hashed so that these compiler calls can be distinguished: `clang -c ab.c -o ab.o` `clang -c ab.c -D_BAR_ -o bar.o` Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D156142 Files: clang/lib/Driver/ToolChains/Clang.cpp Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -45,6 +45,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/BLAKE3.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" @@ -2479,13 +2481,38 @@ return; } - llvm::sys::path::append( - Path, - Twine(llvm::sys::path::filename(Input.getFilename())) + ".%%%%.json"); + // Append the Input basename and a .XXXXXXXXXX.json suffix, where + // X is base64-ish and derived from the Input and Output filenames. + auto AppendBasenameAndHash = [](SmallString<256> &Path, + const InputInfo &Input, + const InputInfo &Output) { + llvm::TruncatedBLAKE3<8> Hash; + llvm::BLAKE3Result<8> HashResult; + Hash.update(llvm::StringRef(Input.getFilename())); + // Simpler value separation than that of HashBuilder, so external tools + // can deduce the filename easier: + Hash.update('\0'); + Hash.update(llvm::StringRef(Output.getFilename())); + Hash.final(HashResult); + + std::string Base64Hash = llvm::encodeBase64(HashResult); + // Base64: 8 * 1.33 (encoding efficiency) > 10 + Base64Hash.resize(10); + // Make the base64 string filesystem and url safe by mapping + to - + // and / to _, see RFC 4648#section-5 + for (char &c : Base64Hash) { + c = (c == '+') ? '-' : ((c == '/') ? '_' : c); + } + llvm::sys::path::append( + Path, Twine(llvm::sys::path::filename(Input.getFilename())) + "." + + StringRef(Base64Hash) + ".json"); + }; + + AppendBasenameAndHash(Path, Input, Output); + int FD; - SmallString<256> TempPath; - Err = llvm::sys::fs::createUniqueFile(Path, FD, TempPath, - llvm::sys::fs::OF_Text); + Err = llvm::sys::fs::openFileForWrite( + Path, FD, llvm::sys::fs::CD_CreateAlways, llvm::sys::fs::OF_Text); if (Err) { Driver.Diag(diag::err_drv_compilationdatabase) << Path << Err.message(); return;
Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -45,6 +45,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/BLAKE3.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" @@ -2479,13 +2481,38 @@ return; } - llvm::sys::path::append( - Path, - Twine(llvm::sys::path::filename(Input.getFilename())) + ".%%%%.json"); + // Append the Input basename and a .XXXXXXXXXX.json suffix, where + // X is base64-ish and derived from the Input and Output filenames. + auto AppendBasenameAndHash = [](SmallString<256> &Path, + const InputInfo &Input, + const InputInfo &Output) { + llvm::TruncatedBLAKE3<8> Hash; + llvm::BLAKE3Result<8> HashResult; + Hash.update(llvm::StringRef(Input.getFilename())); + // Simpler value separation than that of HashBuilder, so external tools + // can deduce the filename easier: + Hash.update('\0'); + Hash.update(llvm::StringRef(Output.getFilename())); + Hash.final(HashResult); + + std::string Base64Hash = llvm::encodeBase64(HashResult); + // Base64: 8 * 1.33 (encoding efficiency) > 10 + Base64Hash.resize(10); + // Make the base64 string filesystem and url safe by mapping + to - + // and / to _, see RFC 4648#section-5 + for (char &c : Base64Hash) { + c = (c == '+') ? '-' : ((c == '/') ? '_' : c); + } + llvm::sys::path::append( + Path, Twine(llvm::sys::path::filename(Input.getFilename())) + "." + + StringRef(Base64Hash) + ".json"); + }; + + AppendBasenameAndHash(Path, Input, Output); + int FD; - SmallString<256> TempPath; - Err = llvm::sys::fs::createUniqueFile(Path, FD, TempPath, - llvm::sys::fs::OF_Text); + Err = llvm::sys::fs::openFileForWrite( + Path, FD, llvm::sys::fs::CD_CreateAlways, llvm::sys::fs::OF_Text); if (Err) { Driver.Diag(diag::err_drv_compilationdatabase) << Path << Err.message(); return;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits