https://github.com/BStott6 updated 
https://github.com/llvm/llvm-project/pull/166381

>From 4481075fce712a3f55493264c11fba6cd4015a4b Mon Sep 17 00:00:00 2001
From: BStott <[email protected]>
Date: Tue, 4 Nov 2025 15:04:29 +0000
Subject: [PATCH 1/4] [TySan] User-friendly (C style) pointer type names for
 error reports

---
 clang/docs/TypeSanitizer.rst                  |  2 -
 compiler-rt/test/tysan/print_stacktrace.c     |  2 +-
 compiler-rt/test/tysan/ptr-float.c            |  2 +-
 .../Instrumentation/TypeSanitizer.cpp         | 40 ++++++++++++++++++-
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/clang/docs/TypeSanitizer.rst b/clang/docs/TypeSanitizer.rst
index 3c683a6c24bb4..c2f628cb231db 100644
--- a/clang/docs/TypeSanitizer.rst
+++ b/clang/docs/TypeSanitizer.rst
@@ -119,8 +119,6 @@ brief dictionary of these terms.
 
 * ``omnipotent char``: This is a special type which can alias with anything. 
Its name comes from the C/C++ 
   type ``char``.
-* ``type p[x]``: This signifies pointers to the type. ``x`` is the number of 
indirections to reach the final value.
-  As an example, a pointer to a pointer to an integer would be ``type p2 int``.
 
 TypeSanitizer is still experimental. User-facing error messages should be 
improved in the future to remove 
 references to LLVM IR specific terms.
diff --git a/compiler-rt/test/tysan/print_stacktrace.c 
b/compiler-rt/test/tysan/print_stacktrace.c
index 3ffb6063377d9..831be5e4afed9 100644
--- a/compiler-rt/test/tysan/print_stacktrace.c
+++ b/compiler-rt/test/tysan/print_stacktrace.c
@@ -10,7 +10,7 @@ void zero_array() {
   for (i = 0; i < 1; ++i)
     P[i] = 0.0f;
   // CHECK: ERROR: TypeSanitizer: type-aliasing-violation
-  // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing 
object of type p1 float
+  // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing 
object of type float*
   // CHECK: {{#0 0x.* in zero_array .*print_stacktrace.c:}}[[@LINE-3]]
   // CHECK-SHORT-NOT: {{#1 0x.* in main .*print_stacktrace.c}}
   // CHECK-LONG-NEXT: {{#1 0x.* in main .*print_stacktrace.c}}
diff --git a/compiler-rt/test/tysan/ptr-float.c 
b/compiler-rt/test/tysan/ptr-float.c
index aaa9895986988..145d5d8f289ea 100644
--- a/compiler-rt/test/tysan/ptr-float.c
+++ b/compiler-rt/test/tysan/ptr-float.c
@@ -7,7 +7,7 @@ void zero_array() {
   for (i = 0; i < 1; ++i)
     P[i] = 0.0f;
   // CHECK: ERROR: TypeSanitizer: type-aliasing-violation
-  // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing 
object of type p1 float
+  // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing 
object of type float*
   // CHECK: {{#0 0x.* in zero_array .*ptr-float.c:}}[[@LINE-3]]
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 87eba5f2c5242..e5109c047584e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -70,6 +70,12 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation(
              "function calls. This verifies that they behave the same."),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> ClUseTBAATypeNames(
+    "tysan-use-tbaa-type-names",
+    cl::desc("Print TBAA-style type names for pointers rather than C-style "
+             "names (e.g. 'p2 int' rather than 'int**')"),
+    cl::Hidden, cl::init(false));
+
 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
 
 namespace {
@@ -260,6 +266,29 @@ static std::string encodeName(StringRef Name) {
   return Output;
 }
 
+/// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
+/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly 
name for this type.
+/// If the type name was changed, returns true and stores the new type name in 
`Dest`.
+/// Otherwise, returns false (`Dest` is unchanged).
+static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string 
&Dest) {
+  if (!TypeName.consume_front("p"))
+    return false;
+
+  int Indirection;
+  if (TypeName.consumeInteger(10, Indirection))
+    return false;
+
+  if (!TypeName.consume_front(" "))
+    return false;
+
+  Dest.clear();
+  Dest.reserve(TypeName.size() + Indirection); // One * per indirection
+  Dest.append(TypeName);
+  Dest.append(Indirection, '*');
+
+  return true;
+}
+
 std::string
 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
                                             TypeNameMapTy &TypeNames) {
@@ -355,7 +384,16 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
   //   [2, member count, [type pointer, offset]..., name]
 
   LLVMContext &C = MD->getContext();
-  Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
+  StringRef TypeName = NameNode->getString();
+
+  // Convert LLVM-internal TBAA-style type names to C-style type names
+  // (more user-friendly)
+  std::string CStyleTypeName;
+  if (!ClUseTBAATypeNames)
+    if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
+      TypeName = CStyleTypeName;
+
+  Constant *NameData = ConstantDataArray::getString(C, TypeName);
   SmallVector<Type *> TDSubTys;
   SmallVector<Constant *> TDSubData;
 

>From a71d469a233dffbac5fb2935bbecbc07416597bd Mon Sep 17 00:00:00 2001
From: BStott <[email protected]>
Date: Tue, 4 Nov 2025 17:21:49 +0000
Subject: [PATCH 2/4] Fix failing test

---
 clang/test/CodeGen/sanitize-type-globals.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/sanitize-type-globals.cpp 
b/clang/test/CodeGen/sanitize-type-globals.cpp
index 1154ab4ca5df2..1300396795ff3 100644
--- a/clang/test/CodeGen/sanitize-type-globals.cpp
+++ b/clang/test/CodeGen/sanitize-type-globals.cpp
@@ -13,7 +13,7 @@
 // CHECK: @__tysan_v1_omnipotent_20char = linkonce_odr constant { i64, i64, 
ptr, i64, [16 x i8] } { i64 2, i64 1, ptr @__tysan_v1_Simple_20C_2b_2b_20TBAA, 
i64 0, [16 x i8] c"omnipotent char\00" }, comdat
 // CHECK: @__tysan_v1_int = linkonce_odr constant { i64, i64, ptr, i64, [4 x 
i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [4 x i8] 
c"int\00" }, comdat
 // CHECK: @__tysan_v1_any_20pointer = linkonce_odr constant { i64, i64, ptr, 
i64, [12 x i8] } { i64 2, i64 1, ptr @__tysan_v1_omnipotent_20char, i64 0, [12 
x i8] c"any pointer\00" }, comdat
-// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, 
[7 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [7 x i8] c"p1 
int\00" }, comdat
+// CHECK: @__tysan_v1_p1_20int = linkonce_odr constant { i64, i64, ptr, i64, 
[5 x i8] } { i64 2, i64 1, ptr @__tysan_v1_any_20pointer, i64 0, [5 x i8] 
c"int*\00" }, comdat
 // CHECK: @__tysan_v1___ZTS9CompleteS = linkonce_odr constant { i64, i64, ptr, 
i64, ptr, i64, [15 x i8] } { i64 2, i64 2, ptr @__tysan_v1_int, i64 0, ptr 
@__tysan_v1_p1_20int, i64 8, [15 x i8] c"_ZTS9CompleteS\00" }, comdat
 // CHECK: @__tysan_v1___ZTS1b = linkonce_odr constant { i64, i64, [7 x i8] } { 
i64 2, i64 0, [7 x i8] c"_ZTS1b\00" }, comdat
 // CHECK: @llvm.used = appending global [8 x ptr] [ptr @tysan.module_ctor, ptr 
@__tysan_v1_Simple_20C_2b_2b_20TBAA, ptr @__tysan_v1_omnipotent_20char, ptr 
@__tysan_v1_int, ptr @__tysan_v1_any_20pointer, ptr @__tysan_v1_p1_20int, ptr 
@__tysan_v1___ZTS9CompleteS, ptr @__tysan_v1___ZTS1b], section "llvm.metadata"

>From ac99a5be6be662c51d3c838774c76c28e5382bc7 Mon Sep 17 00:00:00 2001
From: BStott <[email protected]>
Date: Thu, 6 Nov 2025 13:56:29 +0000
Subject: [PATCH 3/4] Remove command line flag, fix formatting

---
 .../Instrumentation/TypeSanitizer.cpp         | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index e5109c047584e..ab59c3e9de151 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -70,12 +70,6 @@ static cl::opt<bool> ClVerifyOutlinedInstrumentation(
              "function calls. This verifies that they behave the same."),
     cl::Hidden, cl::init(false));
 
-static cl::opt<bool> ClUseTBAATypeNames(
-    "tysan-use-tbaa-type-names",
-    cl::desc("Print TBAA-style type names for pointers rather than C-style "
-             "names (e.g. 'p2 int' rather than 'int**')"),
-    cl::Hidden, cl::init(false));
-
 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
 
 namespace {
@@ -267,10 +261,11 @@ static std::string encodeName(StringRef Name) {
 }
 
 /// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
-/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly 
name for this type.
-/// If the type name was changed, returns true and stores the new type name in 
`Dest`.
-/// Otherwise, returns false (`Dest` is unchanged).
-static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName, std::string 
&Dest) {
+/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly
+/// name for this type. If the type name was changed, returns true and stores
+/// the new type name in `Dest`. Otherwise, returns false (`Dest` is 
unchanged).
+static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName,
+                                              std::string &Dest) {
   if (!TypeName.consume_front("p"))
     return false;
 
@@ -389,9 +384,8 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
   // Convert LLVM-internal TBAA-style type names to C-style type names
   // (more user-friendly)
   std::string CStyleTypeName;
-  if (!ClUseTBAATypeNames)
-    if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
-      TypeName = CStyleTypeName;
+  if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
+    TypeName = CStyleTypeName;
 
   Constant *NameData = ConstantDataArray::getString(C, TypeName);
   SmallVector<Type *> TDSubTys;

>From 62d29b91c330606b4ce603dc177fe37098448e24 Mon Sep 17 00:00:00 2001
From: BStott <[email protected]>
Date: Thu, 20 Nov 2025 11:31:16 +0000
Subject: [PATCH 4/4] Rework pointer typename rewriting to occur in runtime
 rather than instrumentation, fix demangling for pointer names

---
 compiler-rt/lib/tysan/tysan.cpp               | 54 ++++++++++++++++---
 .../Instrumentation/TypeSanitizer.cpp         | 34 +-----------
 2 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp
index 1c67adeba0fc5..76fa8f45ebe4f 100644
--- a/compiler-rt/lib/tysan/tysan.cpp
+++ b/compiler-rt/lib/tysan/tysan.cpp
@@ -22,6 +22,7 @@
 
 #include "tysan/tysan.h"
 
+#include <ctype.h>
 #include <stdint.h>
 #include <string.h>
 
@@ -40,20 +41,62 @@ tysan_copy_types(const void *daddr, const void *saddr, uptr 
size) {
     internal_memmove(shadow_for(daddr), shadow_for(saddr), size * 
sizeof(uptr));
 }
 
-static const char *getDisplayName(const char *Name) {
+/// Struct returned by `parseIndirectionPrefix`.
+struct ParseIndirectionPrefixResult {
+  /// Level of indirection - 0 if the prefix is not found.
+  size_t Indirection;
+  /// Pointer to the remaining part of the name after the indirection prefix.
+  /// (This is the original pointer if the prefix is not found.)
+  const char *RemainingName;
+};
+
+/// Parses the "p{indirection} " prefix given to pointer type names in TBAA.
+static ParseIndirectionPrefixResult parseIndirectionPrefix(const char *Name) {
+  size_t CharIndex = 0;
+
+  // Parse 'p'.
+  // This also handles the case of an empty string.
+  if (Name[CharIndex++] != 'p')
+    return {0, Name};
+
+  // Parse indirection level.
+  size_t Indirection = 0;
+  while (isdigit(Name[CharIndex])) {
+    const auto DigitValue = static_cast<size_t>(Name[CharIndex] - '0');
+    Indirection = Indirection * 10 + DigitValue;
+    ++CharIndex;
+  }
+
+  // Parse space.
+  if (Name[CharIndex++] != ' ')
+    return {0, Name};
+
+  return {Indirection, Name + CharIndex};
+}
+
+static void printDisplayName(const char *Name) {
   if (Name[0] == '\0')
-    return "<anonymous type>";
+    Printf("<anonymous type>");
+
+  // Parse indirection prefix and remove it.
+  const auto [Indirection, RemainingName] = parseIndirectionPrefix(Name);
 
   // Clang generates tags for C++ types that demangle as typeinfo. Remove the
   // prefix from the generated string.
   const char *TIPrefix = "typeinfo name for ";
   size_t TIPrefixLen = strlen(TIPrefix);
 
-  const char *DName = Symbolizer::GetOrInit()->Demangle(Name);
+  const char *DName = Symbolizer::GetOrInit()->Demangle(RemainingName);
   if (!internal_strncmp(DName, TIPrefix, TIPrefixLen))
     DName += TIPrefixLen;
 
-  return DName;
+  // Print type name.
+  Printf("%s", DName);
+
+  // Print asterisks for indirection (C pointer notation).
+  for (size_t i = 0; i < Indirection; ++i) {
+    Printf("*");
+  }
 }
 
 static void printTDName(tysan_type_descriptor *td) {
@@ -75,8 +118,7 @@ static void printTDName(tysan_type_descriptor *td) {
     }
     break;
   case TYSAN_STRUCT_TD:
-    Printf("%s", getDisplayName(
-                     (char *)(td->Struct.Members + td->Struct.MemberCount)));
+    printDisplayName((char *)(td->Struct.Members + td->Struct.MemberCount));
     break;
   }
 }
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index ab59c3e9de151..87eba5f2c5242 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -260,30 +260,6 @@ static std::string encodeName(StringRef Name) {
   return Output;
 }
 
-/// Converts pointer type names from TBAA "p2 int" style to C style ("int**").
-/// Currently leaves "omnipotent char" unchanged - not sure of a user-friendly
-/// name for this type. If the type name was changed, returns true and stores
-/// the new type name in `Dest`. Otherwise, returns false (`Dest` is 
unchanged).
-static bool convertTBAAStyleTypeNamesToCStyle(StringRef TypeName,
-                                              std::string &Dest) {
-  if (!TypeName.consume_front("p"))
-    return false;
-
-  int Indirection;
-  if (TypeName.consumeInteger(10, Indirection))
-    return false;
-
-  if (!TypeName.consume_front(" "))
-    return false;
-
-  Dest.clear();
-  Dest.reserve(TypeName.size() + Indirection); // One * per indirection
-  Dest.append(TypeName);
-  Dest.append(Indirection, '*');
-
-  return true;
-}
-
 std::string
 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
                                             TypeNameMapTy &TypeNames) {
@@ -379,15 +355,7 @@ bool TypeSanitizer::generateBaseTypeDescriptor(
   //   [2, member count, [type pointer, offset]..., name]
 
   LLVMContext &C = MD->getContext();
-  StringRef TypeName = NameNode->getString();
-
-  // Convert LLVM-internal TBAA-style type names to C-style type names
-  // (more user-friendly)
-  std::string CStyleTypeName;
-  if (convertTBAAStyleTypeNamesToCStyle(TypeName, CStyleTypeName))
-    TypeName = CStyleTypeName;
-
-  Constant *NameData = ConstantDataArray::getString(C, TypeName);
+  Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
   SmallVector<Type *> TDSubTys;
   SmallVector<Constant *> TDSubData;
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to