https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/186275
>From fa081737217b635c8e5c0c53a1a63cc98cf98a2b Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <[email protected]> Date: Thu, 12 Mar 2026 17:57:15 -0500 Subject: [PATCH 1/4] [Clang] Fix sret AS for non-trivially-copyable returns. ItaniumCXXABI::classifyReturnType used getAllocaAddrSpace() for sret, forcing callers to return a pointer in the alloca address space. This is wrong whenever the caller's destination is in the default address space for the below cases: Non-trivially-copyable types cannot be copied out of an alloca temp, and types with deleted copy/move constructors make any temp - memcpy workaround wrong. Using LangAS::Default instead so the caller can pass any default-AS pointer directly. Fixes issue #185744 --- clang/lib/CodeGen/CGExprAgg.cpp | 7 ----- clang/lib/CodeGen/ItaniumCXXABI.cpp | 11 ++++--- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 7 +++-- .../test/CodeGenCXX/no-elide-constructors.cpp | 3 +- .../CodeGenHIP/sret-nontrivial-copyable.hip | 29 ++++++++++++++----- clang/test/CodeGenHIP/store-addr-space.hip | 11 ++++--- clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 +- 7 files changed, 39 insertions(+), 32 deletions(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 3a4291719da74..8aad0294fb36e 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -309,13 +309,6 @@ void AggExprEmitter::withReturnValueSlot( llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); - if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) { - llvm::Type *SRetPtrTy = - llvm::PointerType::get(CGF.getLLVMContext(), SRetAS); - RetAddr = RetAddr.withPointer( - CGF.performAddrSpaceCast(RetAddr.getBasePointer(), SRetPtrTy), - RetAddr.isKnownNonNull()); - } } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 8a06051a1c730..f1c3af0f0634d 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1381,12 +1381,15 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // If C++ prohibits us from making a copy, return by address. + // If C++ prohibits us from making a copy, return by address using the + // language default AS. The alloca AS would force callers to provide a + // stack pointer, which is invalid when the destination is a default AS + // and prohibited for types with deleted copy/move constructors. if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + auto DefaultAS = CGM.getContext().getTargetAddressSpace(LangAS::Default); + FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS, + /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 06fce6171eb28..ac04bd267ce67 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1197,9 +1197,10 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + unsigned DefaultAS = + CGM.getContext().getTargetAddressSpace(LangAS::Default); + FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS, + /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 994282debb0d0..66c4a4895035d 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -26,8 +26,7 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( - // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr - // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]] + // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]] // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( diff --git a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip index ee39104470fa1..31ac0f2e4b5c4 100644 --- a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip +++ b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip @@ -1,15 +1,15 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck %s // REQUIRES: amdgpu-registered-target // Verify that a non-trivially-copyable type returned via sret into a member -// field does not trigger an EmitAggregateCopy assertion. -// fix for a buildbot failure +// field uses addrspace(0) for the sret pointer (not addrspace(5)). So +// in-place construction through the addrspace(0) is the only legal option. struct NontrivialPtr { void *p; NontrivialPtr() noexcept; - NontrivialPtr(const NontrivialPtr &) noexcept; - NontrivialPtr &operator=(const NontrivialPtr &) noexcept; + NontrivialPtr(const NontrivialPtr &) = delete; + NontrivialPtr(NontrivialPtr &&) = delete; ~NontrivialPtr() noexcept; }; @@ -21,14 +21,27 @@ struct Wrapper { virtual ~Wrapper() noexcept; }; -// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev( +Wrapper::Wrapper() noexcept : field(make()) {} +// CHECK-LABEL: define dso_local void @_ZN7WrapperC2Ev( // CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr [[THIS1]], align 8 +// CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr [[THIS1]], i32 0, i32 1 +// CHECK-NEXT: call void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR:%.*]]) align 8 [[FIELD]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev( +// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2]] // CHECK-NEXT: ret void // -Wrapper::Wrapper() noexcept : field(make()) {} diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip index 6103edba46274..eaca0c76477cb 100644 --- a/clang/test/CodeGenHIP/store-addr-space.hip +++ b/clang/test/CodeGenHIP/store-addr-space.hip @@ -12,19 +12,18 @@ struct Foo { }; // AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo( -// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr // AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr -// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr // AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr -// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4 +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8 // AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8 -// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] -// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]] +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8 diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index fc6f7c15eb5e6..5d2f63c61e57d 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -19,8 +19,7 @@ E::E() noexcept : foo(s()) {} // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // >From 88038778da8516fd2696f16c29820516d74203c8 Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <[email protected]> Date: Wed, 25 Mar 2026 14:01:01 -0500 Subject: [PATCH 2/4] changes --- clang/lib/CodeGen/CGExprAgg.cpp | 17 ++++++++---- clang/lib/CodeGen/ItaniumCXXABI.cpp | 13 +++++----- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 10 ++++--- clang/lib/CodeGen/TargetInfo.h | 7 +++++ clang/lib/CodeGen/Targets/AMDGPU.cpp | 26 +++++++++++++++++++ .../test/CodeGenCXX/no-elide-constructors.cpp | 3 ++- clang/test/CodeGenHIP/store-addr-space.hip | 11 ++++---- clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 ++- 8 files changed, 67 insertions(+), 23 deletions(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 8aad0294fb36e..acd29f5b7546c 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -288,11 +288,11 @@ void AggExprEmitter::withReturnValueSlot( // its lifetime before we have the chance to emit a proper destructor call. // // We also need a temporary if the destination is in a different address space - // from the alloca AS, to avoid an invalid addrspacecast on the sret pointer. - // Look through addrspacecasts to avoid unnecessary temps when the - // destination is already in the alloca AS. - unsigned SRetAS = CGF.getContext().getTargetAddressSpace( - CGF.CGM.getASTAllocaAddressSpace()); + // from the sret AS. Use the target hook to get the actual sret AS for this + // return type. + const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl(); + LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS); bool DestASMismatch = !Dest.isIgnored() && RetTy.isTriviallyCopyableType(CGF.getContext()) && Dest.getAddress() @@ -309,6 +309,13 @@ void AggExprEmitter::withReturnValueSlot( llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); + if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) { + llvm::Type *SRetPtrTy = + llvm::PointerType::get(CGF.getLLVMContext(), SRetAS); + RetAddr = RetAddr.withPointer( + CGF.performAddrSpaceCast(RetAddr.getBasePointer(), SRetPtrTy), + RetAddr.isKnownNonNull()); + } } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index f1c3af0f0634d..668b20079f0d0 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1381,15 +1381,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // If C++ prohibits us from making a copy, return by address using the - // language default AS. The alloca AS would force callers to provide a - // stack pointer, which is invalid when the destination is a default AS - // and prohibited for types with deleted copy/move constructors. + // If C++ prohibits us from making a copy, return by address using the target + // hook getSRetAddrSpace to decide the AS. if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - auto DefaultAS = CGM.getContext().getTargetAddressSpace(LangAS::Default); - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS, - /*ByVal=*/false); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index ac04bd267ce67..5345d0af4070d 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1197,10 +1197,12 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - unsigned DefaultAS = - CGM.getContext().getTargetAddressSpace(LangAS::Default); - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/DefaultAS, - /*ByVal=*/false); + LangAS SRetAS = !isTrivialForABI + ? CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD) + : CGM.getTargetCodeGenInfo().getASTAllocaAddressSpace(); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 98ee894fe557f..93997d881d5ad 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -32,6 +32,7 @@ class Value; } namespace clang { +class CXXRecordDecl; class Decl; namespace CodeGen { @@ -321,6 +322,12 @@ class TargetCodeGenInfo { /// Get the AST address space for alloca. virtual LangAS getASTAllocaAddressSpace() const { return LangAS::Default; } + /// Get the address space for an indirect (sret) return of the given type. + /// The default falls back to the alloca AS. + virtual LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const { + return getASTAllocaAddressSpace(); + } + /// Get address space of pointer parameter for __cxa_atexit. virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const { return LangAS::Default; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4ac7f42289d6d..ce374822e0fd2 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/AST/DeclCXX.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -308,6 +309,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { return getLangASFromTargetAS( getABIInfo().getDataLayout().getAllocaAddrSpace()); } + + LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override; + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope, @@ -467,6 +471,28 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +static bool hasViableCopyOrMoveConstructor(const CXXRecordDecl *RD) { + if ((RD->needsImplicitCopyConstructor() && + !RD->defaultedCopyConstructorIsDeleted()) || + (RD->needsImplicitMoveConstructor() && + !RD->defaultedMoveConstructorIsDeleted())) + return true; + + return llvm::any_of(RD->ctors(), [](const CXXConstructorDecl *CD) { + return CD->isCopyOrMoveConstructor() && !CD->isDeleted() && + !CD->isIneligibleOrNotSelected(); + }); +} + +LangAS +AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const { + // Types with no viable copy/move must be constructed in-place , use the + // default AS so the sret pointer matches the "this" convention. + if (RD && !RD->canPassInRegisters() && !hasViableCopyOrMoveConstructor(RD)) + return LangAS::Default; + return getASTAllocaAddressSpace(); +} + LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 66c4a4895035d..994282debb0d0 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -26,7 +26,8 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( - // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]] + // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr + // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]] // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip index eaca0c76477cb..6103edba46274 100644 --- a/clang/test/CodeGenHIP/store-addr-space.hip +++ b/clang/test/CodeGenHIP/store-addr-space.hip @@ -12,18 +12,19 @@ struct Foo { }; // AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo( -// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) // AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr // AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr +// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr // AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr -// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4 // AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8 -// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]] -// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] +// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8 diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index 5d2f63c61e57d..fc6f7c15eb5e6 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -19,7 +19,8 @@ E::E() noexcept : foo(s()) {} // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // >From f4c65cb20e95c4c425626ad63027b526aaac4c44 Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <[email protected]> Date: Wed, 25 Mar 2026 16:15:46 -0500 Subject: [PATCH 3/4] fix trivialllycopyable --- clang/lib/CodeGen/CGExprAgg.cpp | 9 +++++++-- clang/test/OpenMP/amdgcn_sret_ctor.cpp | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index acd29f5b7546c..686358877e8e0 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -293,8 +293,13 @@ void AggExprEmitter::withReturnValueSlot( const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl(); LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS); - bool DestASMismatch = !Dest.isIgnored() && - RetTy.isTriviallyCopyableType(CGF.getContext()) && + bool CanAggregateCopy = + RD ? (RD->hasTrivialCopyConstructor() || + RD->hasTrivialMoveConstructor() || RD->hasTrivialCopyAssignment() || + RD->hasTrivialMoveAssignment() || RD->hasAttr<TrivialABIAttr>() || + RD->isUnion()) + : RetTy.isTriviallyCopyableType(CGF.getContext()); + bool DestASMismatch = !Dest.isIgnored() && CanAggregateCopy && Dest.getAddress() .getBasePointer() ->stripPointerCasts() diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index fc6f7c15eb5e6..8d2c035e23472 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -16,11 +16,11 @@ E::E() noexcept : foo(s()) {} // CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5) // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // >From fd12f16be040b6edfbceca48a97ddf58e6875441 Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <[email protected]> Date: Mon, 13 Apr 2026 10:31:58 -0500 Subject: [PATCH 4/4] changes --- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 4 +--- clang/lib/CodeGen/Targets/AMDGPU.cpp | 15 +-------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 5345d0af4070d..0373dd042236d 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1197,9 +1197,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - LangAS SRetAS = !isTrivialForABI - ? CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD) - : CGM.getTargetCodeGenInfo().getASTAllocaAddressSpace(); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index ce374822e0fd2..809a210917449 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -471,24 +471,11 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } -static bool hasViableCopyOrMoveConstructor(const CXXRecordDecl *RD) { - if ((RD->needsImplicitCopyConstructor() && - !RD->defaultedCopyConstructorIsDeleted()) || - (RD->needsImplicitMoveConstructor() && - !RD->defaultedMoveConstructorIsDeleted())) - return true; - - return llvm::any_of(RD->ctors(), [](const CXXConstructorDecl *CD) { - return CD->isCopyOrMoveConstructor() && !CD->isDeleted() && - !CD->isIneligibleOrNotSelected(); - }); -} - LangAS AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const { // Types with no viable copy/move must be constructed in-place , use the // default AS so the sret pointer matches the "this" convention. - if (RD && !RD->canPassInRegisters() && !hasViableCopyOrMoveConstructor(RD)) + if (RD && !RD->canPassInRegisters()) return LangAS::Default; return getASTAllocaAddressSpace(); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
