https://github.com/brunodf-snps updated https://github.com/llvm/llvm-project/pull/126047
>From f516639a2bfe5cad76ac82684877f46ac6260077 Mon Sep 17 00:00:00 2001 From: Bruno De Fraine <brun...@synopsys.com> Date: Thu, 6 Feb 2025 11:50:44 +0100 Subject: [PATCH 1/4] [TBAA] Refine pointer-tbaa for void pointers by pointer depth Commit 77d3f8a avoids distinct tags for any pointers where the ultimate pointee type is `void`, to solve breakage in real-world code that uses (indirections to) `void*` for polymorphism over different pointer types. While this matches the TBAA implementation in GCC, this patch implements a refinement that distinguishes void pointers by pointer depth, as described in the "strict aliasing" documentation included in the aforementioned commit: > `void*` is permitted to alias any pointer type, `void**` is permitted > to alias any pointer to pointer type, and so on. For example, `void**` is no longer considered to alias `int*` in this refinement, but it remains possible to use `void**` for polymorphism over pointers to pointers. --- clang/lib/CodeGen/CodeGenTBAA.cpp | 30 ++++++++++++++++++++++++------ clang/lib/CodeGen/CodeGenTBAA.h | 5 +++++ clang/test/CXX/drs/cwg158.cpp | 3 ++- clang/test/CodeGen/tbaa-pointers.c | 22 ++++++++++++++-------- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 3f1a24791ddd816..3a294fd5980472a 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -80,6 +80,25 @@ llvm::MDNode *CodeGenTBAA::getChar() { return Char; } +llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { + assert(PtrDepth >= 1 && "Pointer must have some depth"); + + if (AnyPtrs.size() < PtrDepth) { + AnyPtrs.reserve(PtrDepth); + auto Size = Module.getDataLayout().getPointerSize(); + // Populate first element. + if (AnyPtrs.empty()) + AnyPtrs.push_back(createScalarTypeNode("any pointer", getChar(), Size)); + // Populate further elements. + for (size_t Idx = AnyPtrs.size(); Idx < PtrDepth; ++Idx) { + auto Name = ("any p" + llvm::Twine(Idx + 1) + " pointer").str(); + AnyPtrs.push_back(createScalarTypeNode(Name, AnyPtrs[Idx - 1], Size)); + } + } + + return AnyPtrs[PtrDepth - 1]; +} + static bool TypeHasMayAlias(QualType QTy) { // Tagged types have declarations, and therefore may have attributes. if (auto *TD = QTy->getAsTagDecl()) @@ -202,9 +221,8 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // they involve a significant representation difference. We don't // currently do so, however. if (Ty->isPointerType() || Ty->isReferenceType()) { - llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size); if (!CodeGenOpts.PointerTBAA) - return AnyPtr; + return getAnyPtr(); // C++ [basic.lval]p11 permits objects to accessed through an l-value of // similar type. Two types are similar under C++ [conv.qual]p2 if the // decomposition of the types into pointers, member pointers, and arrays has @@ -232,7 +250,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // common idioms and there is no good alternative to re-write the code // without strict-aliasing violations. if (Ty->isVoidType()) - return AnyPtr; + return getAnyPtr(PtrDepth); assert(!isa<VariableArrayType>(Ty)); // When the underlying type is a builtin type, we compute the pointee type @@ -256,7 +274,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // similar-types rule. const auto *RT = Ty->getAs<RecordType>(); if (!RT) - return AnyPtr; + return getAnyPtr(); // For unnamed structs or unions C's compatible types rule applies. Two // compatible types in different compilation units can have different @@ -270,7 +288,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // compatibility rule, but it doesn't matter because you can never have a // pointer to an anonymous struct or union. if (!RT->getDecl()->getDeclName()) - return AnyPtr; + return getAnyPtr(); // For non-builtin types use the mangled name of the canonical type. llvm::raw_svector_ostream TyOut(TyName); @@ -281,7 +299,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { OutName += std::to_string(PtrDepth); OutName += " "; OutName += TyName; - return createScalarTypeNode(OutName, AnyPtr, Size); + return createScalarTypeNode(OutName, getAnyPtr(PtrDepth), Size); } // Accesses to arrays are accesses to objects of their element types. diff --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h index ab3b05df7713ba6..0aae171d168df2a 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.h +++ b/clang/lib/CodeGen/CodeGenTBAA.h @@ -139,6 +139,7 @@ class CodeGenTBAA { llvm::MDNode *Root; llvm::MDNode *Char; + llvm::SmallVector<llvm::MDNode *, 4> AnyPtrs; /// getRoot - This is the mdnode for the root of the metadata type graph /// for this translation unit. @@ -148,6 +149,10 @@ class CodeGenTBAA { /// considered to be equivalent to it. llvm::MDNode *getChar(); + /// getAnyPtr - This is the mdnode for any pointer type of (at least) the + /// given pointer depth. + llvm::MDNode *getAnyPtr(unsigned PtrDepth = 1); + /// CollectFields - Collect information about the fields of a type for /// !tbaa.struct metadata formation. Return false for an unsupported type. bool CollectFields(uint64_t BaseOffset, diff --git a/clang/test/CXX/drs/cwg158.cpp b/clang/test/CXX/drs/cwg158.cpp index 1f5c319e6bd25fc..a083ded82bb2e06 100644 --- a/clang/test/CXX/drs/cwg158.cpp +++ b/clang/test/CXX/drs/cwg158.cpp @@ -42,5 +42,6 @@ const int * h(const int * (*p)[10], int *(*q)[9]) { } // POINTER-TBAA: [[PTRARRAY_TBAA]] = !{[[PTRARRAY_TY:!.+]], [[PTRARRAY_TY]], i64 0} -// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYPTR:!.+]], i64 0} +// POINTER-TBAA: [[PTRARRAY_TY]] = !{!"p2 int", [[ANYP2PTR:!.+]], i64 0} +// POINTER-TBAA: [[ANYP2PTR]] = !{!"any p2 pointer", [[ANYPTR:!.+]], // POINTER-TBAA: [[ANYPTR]] = !{!"any pointer" diff --git a/clang/test/CodeGen/tbaa-pointers.c b/clang/test/CodeGen/tbaa-pointers.c index 48adac503357f00..9cfaa0a47af6e75 100644 --- a/clang/test/CodeGen/tbaa-pointers.c +++ b/clang/test/CodeGen/tbaa-pointers.c @@ -208,8 +208,10 @@ int void_ptrs(void **ptr) { // COMMON-LABEL: define i32 @void_ptrs( // COMMON-SAME: ptr noundef [[PTRA:%.+]]) // COMMON: [[PTR_ADDR:%.+]] = alloca ptr, align 8 -// COMMON-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] -// COMMON-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DISABLE-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DEFAULT-NEXT: store ptr [[PTRA]], ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2:!.+]] +// DISABLE-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYPTR]] +// DEFAULT-NEXT: [[L0:%.+]] = load ptr, ptr [[PTR_ADDR]], align 8, !tbaa [[ANYP2]] // COMMON-NEXT: [[L1:%.+]] = load ptr, ptr [[L0]], align 8, !tbaa [[ANYPTR]] // COMMON-NEXT: [[BOOL:%.+]] = icmp ne ptr [[L1]], null // COMMON-NEXT: [[BOOL_EXT:%.+]] = zext i1 [[BOOL]] to i64 @@ -220,7 +222,8 @@ int void_ptrs(void **ptr) { } // DEFAULT: [[P2INT_0]] = !{[[P2INT:!.+]], [[P2INT]], i64 0} -// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_POINTER:!.+]], i64 0} +// DEFAULT: [[P2INT]] = !{!"p2 int", [[ANY_P2_POINTER:!.+]], i64 0} +// DEFAULT: [[ANY_P2_POINTER]] = !{!"any p2 pointer", [[ANY_POINTER:!.+]], i64 0} // DISABLE: [[ANYPTR]] = !{[[ANY_POINTER:!.+]], [[ANY_POINTER]], i64 0} // COMMON: [[ANY_POINTER]] = !{!"any pointer", [[CHAR:!.+]], i64 0} // COMMON: [[CHAR]] = !{!"omnipotent char", [[TBAA_ROOT:!.+]], i64 0} @@ -228,17 +231,19 @@ int void_ptrs(void **ptr) { // DEFAULT: [[P1INT_0]] = !{[[P1INT:!.+]], [[P1INT]], i64 0} // DEFAULT: [[P1INT]] = !{!"p1 int", [[ANY_POINTER]], i64 0} // DEFAULT: [[P3INT_0]] = !{[[P3INT:!.+]], [[P3INT]], i64 0} -// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P3INT]] = !{!"p3 int", [[ANY_P3_POINTER:!.+]], i64 0} +// DEFAULT: [[ANY_P3_POINTER]] = !{!"any p3 pointer", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P4CHAR_0]] = !{[[P4CHAR:!.+]], [[P4CHAR]], i64 0} -// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P4CHAR]] = !{!"p4 omnipotent char", [[ANY_P4_POINTER:!.*]], i64 0} +// DEFAULT: [[ANY_P4_POINTER]] = !{!"any p4 pointer", [[ANY_P3_POINTER]], i64 0} // DEFAULT: [[P3CHAR_0]] = !{[[P3CHAR:!.+]], [[P3CHAR]], i64 0} -// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P3CHAR]] = !{!"p3 omnipotent char", [[ANY_P3_POINTER]], i64 0} // DEFAULT: [[P2CHAR_0]] = !{[[P2CHAR:!.+]], [[P2CHAR]], i64 0} -// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P2CHAR]] = !{!"p2 omnipotent char", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P1CHAR_0]] = !{[[P1CHAR:!.+]], [[P1CHAR]], i64 0} // DEFAULT: [[P1CHAR]] = !{!"p1 omnipotent char", [[ANY_POINTER]], i64 0} // DEFAULT: [[P2S1_TAG]] = !{[[P2S1:!.+]], [[P2S1]], i64 0} -// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_POINTER]], i64 0} +// DEFAULT: [[P2S1]] = !{!"p2 _ZTS2S1", [[ANY_P2_POINTER]], i64 0} // DEFAULT: [[P1S1_TAG:!.+]] = !{[[P1S1:!.+]], [[P1S1]], i64 0} // DEFAULT: [[P1S1]] = !{!"p1 _ZTS2S1", [[ANY_POINTER]], i64 0} // DEFAULT: [[P1S2_TAG]] = !{[[P1S2:!.+]], [[P1S2]], i64 0} @@ -251,3 +256,4 @@ int void_ptrs(void **ptr) { // COMMON: [[INT_TAG]] = !{[[INT_TY:!.+]], [[INT_TY]], i64 0} // COMMON: [[INT_TY]] = !{!"int", [[CHAR]], i64 0} // DEFAULT: [[ANYPTR]] = !{[[ANY_POINTER]], [[ANY_POINTER]], i64 0} +// DEFAULT: [[ANYP2]] = !{[[ANY_P2_POINTER]], [[ANY_P2_POINTER]], i64 0} >From aa4ccd627695e58991553e6c24e2ae8943228677 Mon Sep 17 00:00:00 2001 From: Bruno De Fraine <brun...@synopsys.com> Date: Thu, 6 Feb 2025 22:33:26 +0100 Subject: [PATCH 2/4] CodeGenTBAA: getAnyPtr: add extra comment describing AnyPtrs --- clang/lib/CodeGen/CodeGenTBAA.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 3a294fd5980472a..4d78c9e24457578 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -83,6 +83,9 @@ llvm::MDNode *CodeGenTBAA::getChar() { llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { assert(PtrDepth >= 1 && "Pointer must have some depth"); + // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes + // for "any" pointers of increasing pointer depth, and are organized in the + // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... if (AnyPtrs.size() < PtrDepth) { AnyPtrs.reserve(PtrDepth); auto Size = Module.getDataLayout().getPointerSize(); >From 8068c5b7cea637fd1f6c61233e34a5c666ab569f Mon Sep 17 00:00:00 2001 From: Bruno De Fraine <brun...@synopsys.com> Date: Mon, 10 Feb 2025 10:20:26 +0100 Subject: [PATCH 3/4] CodeGenTBAA: getTypeInfoHelper: also use pointer depth in conservative cases --- clang/lib/CodeGen/CodeGenTBAA.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 4d78c9e24457578..aa8323c276e7803 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -277,7 +277,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // similar-types rule. const auto *RT = Ty->getAs<RecordType>(); if (!RT) - return getAnyPtr(); + return getAnyPtr(PtrDepth); // For unnamed structs or unions C's compatible types rule applies. Two // compatible types in different compilation units can have different @@ -291,7 +291,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { // compatibility rule, but it doesn't matter because you can never have a // pointer to an anonymous struct or union. if (!RT->getDecl()->getDeclName()) - return getAnyPtr(); + return getAnyPtr(PtrDepth); // For non-builtin types use the mangled name of the canonical type. llvm::raw_svector_ostream TyOut(TyName); >From ffa0edc8846a2fad91b298bd982a2299c309011c Mon Sep 17 00:00:00 2001 From: Bruno De Fraine <brun...@synopsys.com> Date: Tue, 11 Feb 2025 11:30:33 +0100 Subject: [PATCH 4/4] CodeGenTBAA: getAnyPtr: further extend description in comment based on suggestion --- clang/lib/CodeGen/CodeGenTBAA.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index aa8323c276e7803..818b6dabaa144c7 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -86,6 +86,20 @@ llvm::MDNode *CodeGenTBAA::getAnyPtr(unsigned PtrDepth) { // Populate at least PtrDepth elements in AnyPtrs. These are the type nodes // for "any" pointers of increasing pointer depth, and are organized in the // hierarchy: any pointer <- any p2 pointer <- any p3 pointer <- ... + // + // Note that AnyPtrs[Idx] is actually the node for pointer depth (Idx+1), + // since there is no node for pointer depth 0. + // + // These "any" pointer type nodes are used in pointer TBAA. The type node of + // a concrete pointer type has the "any" pointer type node of appropriate + // pointer depth as its parent. The "any" pointer type nodes are also used + // directly for accesses to void pointers, or to specific pointers that we + // conservatively do not distinguish in pointer TBAA (e.g. pointers to + // members). Essentially, this establishes that e.g. void** can alias with + // any type that can unify with T**, ignoring things like qualifiers. Here, T + // is a variable that represents an arbitrary type, including pointer types. + // As such, each depth is naturally a subtype of the previous depth, and thus + // transitively of all previous depths. if (AnyPtrs.size() < PtrDepth) { AnyPtrs.reserve(PtrDepth); auto Size = Module.getDataLayout().getPointerSize(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits