Author: Kees Cook
Date: 2025-12-03T15:09:04-08:00
New Revision: 6e479668ba746ee3778b7ab59b7c415976719c93

URL: 
https://github.com/llvm/llvm-project/commit/6e479668ba746ee3778b7ab59b7c415976719c93
DIFF: 
https://github.com/llvm/llvm-project/commit/6e479668ba746ee3778b7ab59b7c415976719c93.diff

LOG: [CodeGen][KCFI] Allow setting type hash from xxHash64 to FNV-1a (#167254)

When emitting the assembly .set directive, KCFI needs to use
getZExtValue(). However, this means that FileCheck pattern matching can't
match between the .set directive and the IR when the high bit of a 32-bit
value is set. We had gotten lucky with the existing tests happening to
just not have had the high bit set. The coming hash change will expose
this, though.

LLVM IR's default printing behavior uses APInt::operator<<, which calls
APInt::print(OS, /*isSigned=*/true). This means KCFI operand bundles in
call instructions print as signed (e.g. [ "kcfi"(i32 -1208803271) ]),
and KCFI type metadata prints as signed (e.g. !3 = !{i32 -1208803271}).
Changing the IR to print unsigned i32 values would impact hundreds of
existing tests, so it is best to just leave it be.

Update the KCFI .set direct to use getSExtValue() in a comment so that
we can both build correctly and use FileCheck with pattern matching in
tests.

KCFI generates hashes in two places. Instead of exposing the hash
implementation in both places, introduce a helper that wraps the
specific hash implementation in a single place, llvm::getKCFITypeID.

In order to transition between KCFI hash, we need to be able to specify
them. Add the Clang option -fsanitize-kcfi-hash= and a IR module option
"kcfi-hash" that can choose between xxHash64 and FNV-1a. Default to
xxHash64 to stay backward compatible, as we'll need to also update rustc
to take a new option to change the hash to FNV-1a for interop with the
coming GCC KCFI.

Added: 
    clang/test/CodeGen/kcfi-hash.c
    llvm/include/llvm/Support/Hash.h
    llvm/lib/Support/Hash.cpp

Modified: 
    clang/include/clang/Basic/CodeGenOptions.h
    clang/include/clang/Options/Options.td
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/CodeGen/cfi-salt.c
    clang/test/CodeGen/kcfi.c
    llvm/lib/Support/CMakeLists.txt
    llvm/lib/Transforms/Instrumentation/KCFI.cpp
    llvm/lib/Transforms/Utils/ModuleUtils.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/CodeGenOptions.h 
b/clang/include/clang/Basic/CodeGenOptions.h
index 6c445253d518b..c60ca507ff917 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -21,6 +21,7 @@
 #include "llvm/Frontend/Debug/Options.h"
 #include "llvm/Frontend/Driver/CodeGenOptions.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Hash.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
@@ -514,6 +515,9 @@ class CodeGenOptions : public CodeGenOptionsBase {
   /// binary metadata pass should not be instrumented.
   std::vector<std::string> SanitizeMetadataIgnorelistFiles;
 
+  /// Hash algorithm to use for KCFI type IDs.
+  llvm::KCFIHashAlgorithm SanitizeKcfiHash;
+
   /// Name of the stack usage file (i.e., .su file) if user passes
   /// -fstack-usage. If empty, it can be implied that -fstack-usage is not
   /// passed on the command line.

diff  --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index d31bd7d6be322..c6841937c8d39 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -2719,6 +2719,16 @@ def fsanitize_kcfi_arity : Flag<["-"], 
"fsanitize-kcfi-arity">,
                            Group<f_clang_Group>,
                            HelpText<"Embed function arity information into the 
KCFI patchable function prefix">,
                            
MarshallingInfoFlag<CodeGenOpts<"SanitizeKcfiArity">>;
+def fsanitize_kcfi_hash_EQ
+    : Joined<["-"], "fsanitize-kcfi-hash=">,
+      HelpText<"Select hash algorithm for KCFI type IDs (xxHash64, FNV-1a)">,
+      Visibility<[ClangOption, CC1Option]>,
+      Values<"xxHash64,FNV-1a">,
+      NormalizedValuesScope<"llvm">,
+      NormalizedValues<["KCFIHashAlgorithm::xxHash64",
+                        "KCFIHashAlgorithm::FNV1a"]>,
+      MarshallingInfoEnum<CodeGenOpts<"SanitizeKcfiHash">,
+                          "KCFIHashAlgorithm::xxHash64">;
 defm sanitize_stats : BoolOption<"f", "sanitize-stats",
   CodeGenOpts<"SanitizeStats">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption], "Enable">,

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index a6a1b84e278b9..319e10c93c517 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -66,11 +66,12 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Hash.h"
 #include "llvm/Support/TimeProfiler.h"
-#include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/RISCVISAInfo.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Instrumentation/KCFI.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include <optional>
 #include <set>
@@ -1272,6 +1273,12 @@ void CodeGenModule::Release() {
                                 CodeGenOpts.PatchableFunctionEntryOffset);
     if (CodeGenOpts.SanitizeKcfiArity)
       getModule().addModuleFlag(llvm::Module::Override, "kcfi-arity", 1);
+    // Store the hash algorithm choice for use in LLVM passes
+    getModule().addModuleFlag(
+        llvm::Module::Override, "kcfi-hash",
+        llvm::MDString::get(
+            getLLVMContext(),
+            llvm::stringifyKCFIHashAlgorithm(CodeGenOpts.SanitizeKcfiHash)));
   }
 
   if (CodeGenOpts.CFProtectionReturn &&
@@ -2450,8 +2457,8 @@ llvm::ConstantInt 
*CodeGenModule::CreateKCFITypeId(QualType T, StringRef Salt) {
   if (getCodeGenOpts().SanitizeCfiICallGeneralizePointers)
     Out << ".generalized";
 
-  return llvm::ConstantInt::get(Int32Ty,
-                                
static_cast<uint32_t>(llvm::xxHash64(OutName)));
+  return llvm::ConstantInt::get(
+      Int32Ty, llvm::getKCFITypeID(OutName, 
getCodeGenOpts().SanitizeKcfiHash));
 }
 
 void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
@@ -3205,7 +3212,8 @@ void CodeGenModule::finalizeKCFITypes() {
       continue;
 
     std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" 
+
-                       Name + ", " + Twine(Type->getZExtValue()) + "\n")
+                       Name + ", " + Twine(Type->getZExtValue()) + " # " +
+                       Twine(Type->getSExtValue()) + "\n")
                           .str();
     M.appendModuleInlineAsm(Asm);
   }

diff  --git a/clang/test/CodeGen/cfi-salt.c b/clang/test/CodeGen/cfi-salt.c
index 7ba1e2fc14daa..8363236869013 100644
--- a/clang/test/CodeGen/cfi-salt.c
+++ b/clang/test/CodeGen/cfi-salt.c
@@ -27,9 +27,9 @@ typedef unsigned int (* __cfi_salt ufn_salt_t)(void);
 
 /// Must emit __kcfi_typeid symbols for address-taken function declarations
 // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
-// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,LOW_SODIUM_HASH:]]"
+// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # 
[[#%d,LOW_SODIUM_HASH:]]"
 // CHECK: module asm ".weak __kcfi_typeid_[[F4_SALT:[a-zA-Z0-9_]+]]"
-// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], [[#%d,ASM_SALTY_HASH:]]"
+// CHECK: module asm ".set __kcfi_typeid_[[F4_SALT]], {{[0-9]+}} # 
[[#%d,ASM_SALTY_HASH:]]"
 
 /// Must not __kcfi_typeid symbols for non-address-taken declarations
 // CHECK-NOT: module asm ".weak __kcfi_typeid_f6"

diff  --git a/clang/test/CodeGen/kcfi-hash.c b/clang/test/CodeGen/kcfi-hash.c
new file mode 100644
index 0000000000000..636d265feb9b4
--- /dev/null
+++ b/clang/test/CodeGen/kcfi-hash.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi 
-o - %s | FileCheck --check-prefix=DEFAULT %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi 
-fsanitize-kcfi-hash=xxHash64 -o - %s | FileCheck --check-prefix=XXHASH %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=kcfi 
-fsanitize-kcfi-hash=FNV-1a -o - %s | FileCheck --check-prefix=FNV %s
+
+void foo(void) {}
+
+// DEFAULT: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"}
+// XXHASH: ![[#]] = !{i32 4, !"kcfi-hash", !"xxHash64"}
+// FNV: ![[#]] = !{i32 4, !"kcfi-hash", !"FNV-1a"}

diff  --git a/clang/test/CodeGen/kcfi.c b/clang/test/CodeGen/kcfi.c
index 622843cedba50..b2856b5149be9 100644
--- a/clang/test/CodeGen/kcfi.c
+++ b/clang/test/CodeGen/kcfi.c
@@ -7,7 +7,7 @@
 
 /// Must emit __kcfi_typeid symbols for address-taken function declarations
 // CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
-// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]"
+// CHECK: module asm ".set __kcfi_typeid_[[F4]], {{[0-9]+}} # [[#%d,HASH:]]"
 /// Must not __kcfi_typeid symbols for non-address-taken declarations
 // CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}"
 

diff  --git a/llvm/include/llvm/Support/Hash.h 
b/llvm/include/llvm/Support/Hash.h
new file mode 100644
index 0000000000000..bf98f0dcef836
--- /dev/null
+++ b/llvm/include/llvm/Support/Hash.h
@@ -0,0 +1,36 @@
+//===- llvm/Support/Hash.h - Hash functions --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides hash functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HASH_H
+#define LLVM_SUPPORT_HASH_H
+
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+
+namespace llvm {
+
+enum class KCFIHashAlgorithm { xxHash64, FNV1a };
+
+/// Parse a KCFI hash algorithm name.
+/// Returns xxHash64 if the name is not recognized.
+KCFIHashAlgorithm parseKCFIHashAlgorithm(StringRef Name);
+
+/// Convert a KCFI hash algorithm enum to its string representation.
+StringRef stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm);
+
+/// Compute KCFI type ID from mangled type name.
+/// The algorithm can be xxHash64 or FNV-1a.
+uint32_t getKCFITypeID(StringRef MangledTypeName, KCFIHashAlgorithm Algorithm);
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HASH_H

diff  --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index a0980bda2a212..1c397e8c0b766 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -202,6 +202,7 @@ add_llvm_component_library(LLVMSupport
   FormatVariadic.cpp
   GlobPattern.cpp
   GraphWriter.cpp
+  Hash.cpp
   HexagonAttributeParser.cpp
   HexagonAttributes.cpp
   InitLLVM.cpp

diff  --git a/llvm/lib/Support/Hash.cpp b/llvm/lib/Support/Hash.cpp
new file mode 100644
index 0000000000000..38befcca86b15
--- /dev/null
+++ b/llvm/lib/Support/Hash.cpp
@@ -0,0 +1,51 @@
+//===- Hash.cpp - Hash functions ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hash functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Hash.h"
+#include "llvm/Support/xxhash.h"
+
+using namespace llvm;
+
+KCFIHashAlgorithm llvm::parseKCFIHashAlgorithm(StringRef Name) {
+  if (Name == "FNV-1a")
+    return KCFIHashAlgorithm::FNV1a;
+  // Default to xxHash64 for backward compatibility
+  return KCFIHashAlgorithm::xxHash64;
+}
+
+StringRef llvm::stringifyKCFIHashAlgorithm(KCFIHashAlgorithm Algorithm) {
+  switch (Algorithm) {
+  case KCFIHashAlgorithm::xxHash64:
+    return "xxHash64";
+  case KCFIHashAlgorithm::FNV1a:
+    return "FNV-1a";
+  }
+  llvm_unreachable("Unknown KCFI hash algorithm");
+}
+
+uint32_t llvm::getKCFITypeID(StringRef MangledTypeName,
+                             KCFIHashAlgorithm Algorithm) {
+  switch (Algorithm) {
+  case KCFIHashAlgorithm::xxHash64:
+    // Use lower 32 bits of xxHash64
+    return static_cast<uint32_t>(xxHash64(MangledTypeName));
+  case KCFIHashAlgorithm::FNV1a:
+    // FNV-1a hash (32-bit)
+    uint32_t Hash = 2166136261u; // FNV offset basis
+    for (unsigned char C : MangledTypeName) {
+      Hash ^= C;
+      Hash *= 16777619u; // FNV prime
+    }
+    return Hash;
+  }
+  llvm_unreachable("Unknown KCFI hash algorithm");
+}

diff  --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp 
b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
index f4cb4e2d1c9e1..f06b1d3157939 100644
--- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp
+++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/xxhash.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 

diff  --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp 
b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 596849ecab742..30d7831f06a2b 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,17 +11,18 @@
 
//===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Hash.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/xxhash.h"
+#include "llvm/Transforms/Instrumentation/KCFI.h"
 
 using namespace llvm;
 
@@ -208,10 +209,16 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef 
MangledType) {
   std::string Type = MangledType.str();
   if (M.getModuleFlag("cfi-normalize-integers"))
     Type += ".normalized";
+
+  // Determine which hash algorithm to use
+  auto *MD = dyn_cast_or_null<MDString>(M.getModuleFlag("kcfi-hash"));
+  KCFIHashAlgorithm Algorithm =
+      parseKCFIHashAlgorithm(MD ? MD->getString() : "");
+
   F.setMetadata(LLVMContext::MD_kcfi_type,
                 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
                                      Type::getInt32Ty(Ctx),
-                                     static_cast<uint32_t>(xxHash64(Type))))));
+                                     getKCFITypeID(Type, Algorithm)))));
   // If the module was compiled with -fpatchable-function-entry, ensure
   // we use the same patchable-function-prefix.
   if (auto *MD = mdconst::extract_or_null<ConstantInt>(


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to