https://github.com/BStott6 updated https://github.com/llvm/llvm-project/pull/166381
>From 4481075fce712a3f55493264c11fba6cd4015a4b Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Tue, 4 Nov 2025 15:04:29 +0000 Subject: [PATCH 1/5] [TySan] User-friendly (C style) pointer type names for error reports --- clang/docs/TypeSanitizer.rst | 2 - compiler-rt/test/tysan/print_stacktrace.c | 2 +- compiler-rt/test/tysan/ptr-float.c | 2 +- .../Instrumentation/TypeSanitizer.cpp | 40 ++++++++++++++++++- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst index 3c683a6c24bb4..c2f628cb231db 100644 --- a/clang/docs/TypeSanitizer.rst +++ b/clang/docs/TypeSanitizer.rst @@ -119,8 +119,6 @@ brief dictionary of these terms. * ``omnipotent char``: This is a special type which can alias with anything. Its name comes from the C/C++ type ``char``. -* ``type p[x]``: This signifies pointers to the type. ``x`` is the number of indirections to reach the final value. - As an example, a pointer to a pointer to an integer would be ``type p2 int``. TypeSanitizer is still experimental. User-facing error messages should be improved in the future to remove references to LLVM IR specific terms. diff --git a/compiler-rt/test/tysan/print_stacktrace.c b/compiler-rt/test/tysan/print_stacktrace.c index 3ffb6063377d9..831be5e4afed9 100644 --- a/compiler-rt/test/tysan/print_stacktrace.c +++ b/compiler-rt/test/tysan/print_stacktrace.c @@ -10,7 +10,7 @@ void zero_array() { for (i = 0; i < 1; ++i) P[i] = 0.0f; // CHECK: ERROR: TypeSanitizer: type-aliasing-violation - // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float* // CHECK: {{#0 0x.* in zero_array .*print_stacktrace.c:}}[[@LINE-3]] // CHECK-SHORT-NOT: {{#1 0x.* in main .*print_stacktrace.c}} // CHECK-LONG-NEXT: {{#1 0x.* in main .*print_stacktrace.c}} diff --git a/compiler-rt/test/tysan/ptr-float.c b/compiler-rt/test/tysan/ptr-float.c index aaa9895986988..145d5d8f289ea 100644 --- a/compiler-rt/test/tysan/ptr-float.c +++ b/compiler-rt/test/tysan/ptr-float.c @@ -7,7 +7,7 @@ void zero_array() { for (i = 0; i < 1; ++i) P[i] = 0.0f; // CHECK: ERROR: TypeSanitizer: type-aliasing-violation - // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type float* // CHECK: {{#0 0x.* in zero_array .*ptr-float.c:}}[[@LINE-3]] } diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 87eba5f2c5242..e5109c047584e 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -70,6 +70,12 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation( "function calls. This verifies that they behave the same."), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClUseTBAATypeNames( + "tysan-use-tbaa-type-names", + cl::desc("Print TBAA-style type names for pointers rather than C-style " + "names (e.g. 'p2 int' rather than 'int**')"), + cl::Hidden, cl::init(false)); + STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses"); namespace { @@ -260,6 +266,29 @@ static std::string encodeName(StringRef Name) { return Output; } +/// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). +/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type. +/// If the type name was changed, returns true and stores the new type name in `Dest`. +/// Otherwise, returns false (`Dest` is unchanged). +static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) { + if (!TypeName.consume_front("p")) + return false; + + int Indirection; + if (TypeName.consumeInteger(10, Indirection)) + return false; + + if (!TypeName.consume_front(" ")) + return false; + + Dest.clear(); + Dest.reserve(TypeName.size() + Indirection); // One * per indirection + Dest.append(TypeName); + Dest.append(Indirection, '*'); + + return true; +} + std::string TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD, TypeNameMapTy &TypeNames) { @@ -355,7 +384,16 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // [2, member count, [type pointer, offset]..., name] LLVMContext &C = MD->getContext(); - Constant *NameData = ConstantDataArray::getString(C, NameNode->getString()); + StringRef TypeName = NameNode->getString(); + + // Convert LLVM-internal TBAA-style type names to C-style type names + // (more user-friendly) + std::string CStyleTypeName; + if (!ClUseTBAATypeNames) + if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) + TypeName = CStyleTypeName; + + Constant *NameData = ConstantDataArray::getString(C, TypeName); SmallVector<Type *> TDSubTys; SmallVector<Constant *> TDSubData; >From a71d469a233dffbac5fb2935bbecbc07416597bd Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Tue, 4 Nov 2025 17:21:49 +0000 Subject: [PATCH 2/5] Fix failing test --- clang/test/CodeGen/sanitize-type-globals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp b/clang/test/CodeGen/sanitize-type-globals.cpp index 1154ab4ca5df2..1300396795ff3 100644 --- a/clang/test/CodeGen/sanitize-type-globals.cpp +++ b/clang/test/CodeGen/sanitize-type-globals.cpp @@ -13,7 +13,7 @@ // CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat // CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat // CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 x i8] c"any pointer\00" }, comdat -// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 int\00" }, comdat +// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] c"int*\00" }, comdat // CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat // CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat // CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr @__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata" >From ac99a5be6be662c51d3c838774c76c28e5382bc7 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 6 Nov 2025 13:56:29 +0000 Subject: [PATCH 3/5] Remove command line flag, fix formatting --- .../Instrumentation/TypeSanitizer.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index e5109c047584e..ab59c3e9de151 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -70,12 +70,6 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation( "function calls. This verifies that they behave the same."), cl::Hidden, cl::init(false)); -static cl::opt<bool> ClUseTBAATypeNames( - "tysan-use-tbaa-type-names", - cl::desc("Print TBAA-style type names for pointers rather than C-style " - "names (e.g. 'p2 int' rather than 'int**')"), - cl::Hidden, cl::init(false)); - STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses"); namespace { @@ -267,10 +261,11 @@ static std::string encodeName(StringRef Name) { } /// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). -/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly name for this type. -/// If the type name was changed, returns true and stores the new type name in `Dest`. -/// Otherwise, returns false (`Dest` is unchanged). -static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string &Dest) { +/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly +/// name for this type. If the type name was changed, returns true and stores +/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged). +static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, + std::string &Dest) { if (!TypeName.consume_front("p")) return false; @@ -389,9 +384,8 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // Convert LLVM-internal TBAA-style type names to C-style type names // (more user-friendly) std::string CStyleTypeName; - if (!ClUseTBAATypeNames) - if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) - TypeName = CStyleTypeName; + if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) + TypeName = CStyleTypeName; Constant *NameData = ConstantDataArray::getString(C, TypeName); SmallVector<Type *> TDSubTys; >From 62d29b91c330606b4ce603dc177fe37098448e24 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 20 Nov 2025 11:31:16 +0000 Subject: [PATCH 4/5] Rework pointer typename rewriting to occur in runtime rather than instrumentation, fix demangling for pointer names --- compiler-rt/lib/tysan/tysan.cpp | 54 ++++++++++++++++--- .../Instrumentation/TypeSanitizer.cpp | 34 +----------- 2 files changed, 49 insertions(+), 39 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp index 1c67adeba0fc5..76fa8f45ebe4f 100644 --- a/compiler-rt/lib/tysan/tysan.cpp +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -22,6 +22,7 @@ #include "tysan/tysan.h" +#include <ctype.h> #include <stdint.h> #include <string.h> @@ -40,20 +41,62 @@ tysan_copy_types(const void *daddr, const void *saddr, uptr size) { internal_memmove(shadow_for(daddr), shadow_for(saddr), size * sizeof(uptr)); } -static const char *getDisplayName(const char *Name) { +/// Struct returned by `parseIndirectionPrefix`. +struct ParseIndirectionPrefixResult { + /// Level of indirection - 0 if the prefix is not found. + size_t Indirection; + /// Pointer to the remaining part of the name after the indirection prefix. + /// (This is the original pointer if the prefix is not found.) + const char *RemainingName; +}; + +/// Parses the "p{indirection} " prefix given to pointer type names in TBAA. +static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) { + size_t CharIndex = 0; + + // Parse 'p'. + // This also handles the case of an empty string. + if (Name[CharIndex++] != 'p') + return {0, Name}; + + // Parse indirection level. + size_t Indirection = 0; + while (isdigit(Name[CharIndex])) { + const auto DigitValue = static_cast<size_t>(Name[CharIndex] - '0'); + Indirection = Indirection * 10 + DigitValue; + ++CharIndex; + } + + // Parse space. + if (Name[CharIndex++] != ' ') + return {0, Name}; + + return {Indirection, Name + CharIndex}; +} + +static void printDisplayName(const char *Name) { if (Name[0] == '\0') - return "<anonymous type>"; + Printf("<anonymous type>"); + + // Parse indirection prefix and remove it. + const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name); // Clang generates tags for C++ types that demangle as typeinfo. Remove the // prefix from the generated string. const char *TIPrefix = "typeinfo name for "; size_t TIPrefixLen = strlen(TIPrefix); - const char *DName = Symbolizer::GetOrInit()->Demangle(Name); + const char *DName = Symbolizer::GetOrInit()->Demangle(RemainingName); if (!internal_strncmp(DName, TIPrefix, TIPrefixLen)) DName += TIPrefixLen; - return DName; + // Print type name. + Printf("%s", DName); + + // Print asterisks for indirection (C pointer notation). + for (size_t i = 0; i < Indirection; ++i) { + Printf("*"); + } } static void printTDName(tysan_type_descriptor *td) { @@ -75,8 +118,7 @@ static void printTDName(tysan_type_descriptor *td) { } break; case TYSAN_STRUCT_TD: - Printf("%s", getDisplayName( - (char *)(td->Struct.Members + td->Struct.MemberCount))); + printDisplayName((char *)(td->Struct.Members + td->Struct.MemberCount)); break; } } diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index ab59c3e9de151..87eba5f2c5242 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -260,30 +260,6 @@ static std::string encodeName(StringRef Name) { return Output; } -/// Converts pointer type names from TBAA "p2 int" style to C style ("int**"). -/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly -/// name for this type. If the type name was changed, returns true and stores -/// the new type name in `Dest`. Otherwise, returns false (`Dest` is unchanged). -static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, - std::string &Dest) { - if (!TypeName.consume_front("p")) - return false; - - int Indirection; - if (TypeName.consumeInteger(10, Indirection)) - return false; - - if (!TypeName.consume_front(" ")) - return false; - - Dest.clear(); - Dest.reserve(TypeName.size() + Indirection); // One * per indirection - Dest.append(TypeName); - Dest.append(Indirection, '*'); - - return true; -} - std::string TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD, TypeNameMapTy &TypeNames) { @@ -379,15 +355,7 @@ bool TypeSanitizer::generateBaseTypeDescriptor( // [2, member count, [type pointer, offset]..., name] LLVMContext &C = MD->getContext(); - StringRef TypeName = NameNode->getString(); - - // Convert LLVM-internal TBAA-style type names to C-style type names - // (more user-friendly) - std::string CStyleTypeName; - if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName)) - TypeName = CStyleTypeName; - - Constant *NameData = ConstantDataArray::getString(C, TypeName); + Constant *NameData = ConstantDataArray::getString(C, NameNode->getString()); SmallVector<Type *> TDSubTys; SmallVector<Constant *> TDSubData; >From c02782655c5a061ee28e3456abe7f188aec082a4 Mon Sep 17 00:00:00 2001 From: BStott <[email protected]> Date: Thu, 20 Nov 2025 11:46:43 +0000 Subject: [PATCH 5/5] Fix test --- clang/test/CodeGen/sanitize-type-globals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp b/clang/test/CodeGen/sanitize-type-globals.cpp index 1300396795ff3..1154ab4ca5df2 100644 --- a/clang/test/CodeGen/sanitize-type-globals.cpp +++ b/clang/test/CodeGen/sanitize-type-globals.cpp @@ -13,7 +13,7 @@ // CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, i64 0, [16 x i8] c"omnipotent char\00" }, comdat // CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] c"int\00" }, comdat // CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 x i8] c"any pointer\00" }, comdat -// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] c"int*\00" }, comdat +// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, [7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 int\00" }, comdat // CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr @__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat // CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat // CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr @__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr @__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
