https://github.com/Jason-VanBeusekom updated https://github.com/llvm/llvm-project/pull/164326
>From 26b90e0098e62949cf8341cddcb69f3faf7000db Mon Sep 17 00:00:00 2001 From: Jason Van Beusekom <[email protected]> Date: Mon, 20 Oct 2025 14:16:10 -0500 Subject: [PATCH 1/2] [OpenMP][clang][HIP][CUDA] fix weak alias emit on device compilation when aliasee is no declared on device Add checks to skip the emitting of an alias on the device when the aliasee is not declared on the device. This change effects OpenMP, Hip and Cuda. --- clang/lib/CodeGen/CodeGenModule.cpp | 34 ++++++++- clang/test/CodeGenCUDA/cuda_weak_alias.cu | 36 +++++++++ clang/test/CodeGenHIP/hip_weak_alias.cpp | 63 ++++++++++++++++ clang/test/OpenMP/amdgcn_weak_alias.c | 90 +++++++++++++++++++++++ clang/test/OpenMP/nvptx_weak_alias.c | 34 +++++++++ 5 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCUDA/cuda_weak_alias.cu create mode 100644 clang/test/CodeGenHIP/hip_weak_alias.cpp create mode 100644 clang/test/OpenMP/amdgcn_weak_alias.c create mode 100644 clang/test/OpenMP/nvptx_weak_alias.c diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c5eb14e329315..ac0de5a221ec7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4065,8 +4065,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // If this is an alias definition (which otherwise looks like a declaration) // emit it now. - if (Global->hasAttr<AliasAttr>()) + if (Global->hasAttr<AliasAttr>()) { + if (LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA) { + const auto *AA = Global->getAttr<AliasAttr>(); + assert(AA && "Not an alias?"); + GlobalDecl AliaseeGD; + if (!lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) { + if (LangOpts.CUDA) + // Failed to find aliasee on device side, skip emitting + return; + } else { + const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl()); + if (LangOpts.OpenMPIsTargetDevice) { + if (!AliaseeDecl || + !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( + AliaseeDecl)) + // Not a target declaration, skip emitting + return; + } else { + // HIP/CUDA + const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>(); + const bool AliaseeHasDeviceAttr = + AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>(); + if (LangOpts.CUDAIsDevice) { + if (!HasDeviceAttr || !AliaseeHasDeviceAttr) + return; + } else if (HasDeviceAttr && AliaseeHasDeviceAttr) { + // Alias is only on device side, skip emitting on host side + return; + } + } + } + } return EmitAliasDefinition(GD); + } // IFunc like an alias whose value is resolved at runtime by calling resolver. if (Global->hasAttr<IFuncAttr>()) diff --git a/clang/test/CodeGenCUDA/cuda_weak_alias.cu b/clang/test/CodeGenCUDA/cuda_weak_alias.cu new file mode 100644 index 0000000000000..fda0ed7e5d74b --- /dev/null +++ b/clang/test/CodeGenCUDA/cuda_weak_alias.cu @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x cuda -triple x86_64-unknown-linux-gnu -aux-triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST + +extern "C" { + +//. +// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc +//. +// HOST-LABEL: define dso_local i32 @__HostFunc( +// HOST-SAME: ) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 42 +// +int __HostFunc(void) { return 42; } +int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); + +} + +// HOST-LABEL: define dso_local noundef i32 @main( +// HOST-SAME: ) #[[ATTR1:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// HOST-NEXT: ret i32 0 +// +int main() { + return 0; +} +//. +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp new file mode 100644 index 0000000000000..6a57ce1ab74c7 --- /dev/null +++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp @@ -0,0 +1,63 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE + +#define __device__ __attribute__((device)) + +extern "C" { + +//. +// HOST: @__hip_cuid_ = global i8 0 +// HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata" +// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc +//. +// DEVICE: @__hip_cuid_ = addrspace(1) global i8 0 +// DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" +// DEVICE: @One = weak alias i32 (), ptr @__One +//. +// HOST-LABEL: define dso_local i32 @__HostFunc( +// HOST-SAME: ) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 42 +// +int __HostFunc(void) { return 42; } +int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); + +// DEVICE-LABEL: define dso_local i32 @__One( +// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +__device__ int __One(void) { return 2; } +__device__ int One(void) __attribute__ ((weak, alias("__One"))); + +} + +// HOST-LABEL: define dso_local noundef i32 @main( +// HOST-SAME: ) #[[ATTR1:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// HOST-NEXT: ret i32 0 +// +int main() { + return 0; +} +//. +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +//. +// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. +// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} +// DEVICE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// DEVICE: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c new file mode 100644 index 0000000000000..bf8645bef6d78 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_weak_alias.c @@ -0,0 +1,90 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE + +//. +// HOST: @One = weak alias i32 (), ptr @__One +// HOST: @Two = weak alias i32 (), ptr @__Two +// HOST: @Three = weak alias i32 (), ptr @__Three +//. +// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @Two = weak hidden alias i32 (), ptr @__Two +// DEVICE: @Three = weak hidden alias i32 (), ptr @__Three +// DEVICE: @Three.1 = weak hidden alias i32 (), ptr @__Three +//. +// HOST-LABEL: define dso_local i32 @__One( +// HOST-SAME: ) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 1 +// +int __One(void) { return 1; } +int One(void) __attribute__ ((weak, alias("__One"))); + +#pragma omp declare target +// HOST-LABEL: define dso_local i32 @__Two( +// HOST-SAME: ) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 2 +// +// DEVICE-LABEL: define hidden i32 @__Two( +// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +int __Two(void) { return 2; } +int Two(void) __attribute__ ((weak, alias("__Two"))); +#pragma omp end declare target + +#pragma omp declare target +// HOST-LABEL: define dso_local i32 @__Three( +// HOST-SAME: ) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 3 +// +// DEVICE-LABEL: define hidden i32 @__Three( +// DEVICE-SAME: ) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 3 +// +int __Three(void) { return 3; } +#pragma omp end declare target +int Three(void) __attribute__ ((weak, alias("__Three"))); + + +// HOST-LABEL: define dso_local i32 @main( +// HOST-SAME: ) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// HOST-NEXT: ret i32 0 +// +int main(){ + return 0; +} + +//. +// HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// DEVICE: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +//. +// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51} +// HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. +// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51} +// DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} +// DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/OpenMP/nvptx_weak_alias.c b/clang/test/OpenMP/nvptx_weak_alias.c new file mode 100644 index 0000000000000..695bd7d0b8af9 --- /dev/null +++ b/clang/test/OpenMP/nvptx_weak_alias.c @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s + +//. +// CHECK: @One = weak alias i32 (), ptr @__One +//. +// CHECK-LABEL: define dso_local i32 @__One( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i32 1 +// +int __One(void) { return 1; } +int One(void) __attribute__ ((weak, alias("__One"))); + + +// CHECK-LABEL: define dso_local i32 @main( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 0 +// +int main(){ + return 0; +} +//. +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51} +// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. >From 16c1a6888b87644f24c07a75c067d9e25eb1ac3e Mon Sep 17 00:00:00 2001 From: Jason Van Beusekom <[email protected]> Date: Fri, 24 Oct 2025 15:21:51 -0500 Subject: [PATCH 2/2] Move weak alias code to seperate function, refactor weak alias, update li tests --- clang/lib/CodeGen/CodeGenModule.cpp | 85 +++++++++------ clang/test/CodeGenCUDA/cuda_weak_alias.cu | 12 --- clang/test/CodeGenHIP/hip_weak_alias.cpp | 84 +++++++++++++-- clang/test/OpenMP/amdgcn_weak_alias.c | 13 --- clang/test/OpenMP/amdgcn_weak_alias.cpp | 120 ++++++++++++++++++++++ clang/test/OpenMP/nvptx_weak_alias.c | 12 --- 6 files changed, 247 insertions(+), 79 deletions(-) create mode 100644 clang/test/OpenMP/amdgcn_weak_alias.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ac0de5a221ec7..a838bac03cb5c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4043,6 +4043,58 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) { return D->isImplicit(); } +static bool shouldSkipAliasEmission(const CodeGenModule &CGM, + const ValueDecl *Global) { + const LangOptions &LangOpts = CGM.getLangOpts(); + if (!(LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA)) + return false; + + const auto *AA = Global->getAttr<AliasAttr>(); + GlobalDecl AliaseeGD; + + // Check if the aliasee exists. + if (!CGM.lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) { + if (LangOpts.CUDA) + // In CUDA/HIP, if the aliasee is not found, skip the alias emission. + // This is not a hard error as this branch is executed for both the host + // and device, with no respect to where the aliasee is defined. + return true; + + // For OpenMP, lookupRepresentativeDecl should always find the aliasee, this + // is an error + CGM.getDiags().Report(AA->getLocation(), diag::err_alias_to_undefined) + << false << true; + return false; + } + + const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl()); + if (LangOpts.OpenMPIsTargetDevice) { + if (!AliaseeDecl || + !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(AliaseeDecl)) + // On OpenMP device, skip alias emission if the aliasee is not marked + // with declare target. + return true; + return false; + } + + // CUDA / HIP + const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>(); + const bool AliaseeHasDeviceAttr = + AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>(); + + if (LangOpts.CUDAIsDevice) { + if (!HasDeviceAttr || !AliaseeHasDeviceAttr) + // On device, skip alias emission if either the alias or the aliasee + // is not marked with __device__. + return true; + return false; + } + + // CUDA / HIP Host + // we know that the aliasee exists from above, so we know to emit + return false; +} + bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const { assert(LangOpts.CUDA && "Should not be called by non-CUDA languages"); // We need to emit host-side 'shadows' for all global @@ -4066,37 +4118,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // If this is an alias definition (which otherwise looks like a declaration) // emit it now. if (Global->hasAttr<AliasAttr>()) { - if (LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA) { - const auto *AA = Global->getAttr<AliasAttr>(); - assert(AA && "Not an alias?"); - GlobalDecl AliaseeGD; - if (!lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) { - if (LangOpts.CUDA) - // Failed to find aliasee on device side, skip emitting - return; - } else { - const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl()); - if (LangOpts.OpenMPIsTargetDevice) { - if (!AliaseeDecl || - !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( - AliaseeDecl)) - // Not a target declaration, skip emitting - return; - } else { - // HIP/CUDA - const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>(); - const bool AliaseeHasDeviceAttr = - AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>(); - if (LangOpts.CUDAIsDevice) { - if (!HasDeviceAttr || !AliaseeHasDeviceAttr) - return; - } else if (HasDeviceAttr && AliaseeHasDeviceAttr) { - // Alias is only on device side, skip emitting on host side - return; - } - } - } - } + if (shouldSkipAliasEmission(*this, Global)) + return; return EmitAliasDefinition(GD); } diff --git a/clang/test/CodeGenCUDA/cuda_weak_alias.cu b/clang/test/CodeGenCUDA/cuda_weak_alias.cu index fda0ed7e5d74b..a0f9dde22f0bd 100644 --- a/clang/test/CodeGenCUDA/cuda_weak_alias.cu +++ b/clang/test/CodeGenCUDA/cuda_weak_alias.cu @@ -16,20 +16,8 @@ int __HostFunc(void) { return 42; } int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); } - -// HOST-LABEL: define dso_local noundef i32 @main( -// HOST-SAME: ) #[[ATTR1:[0-9]+]] { -// HOST-NEXT: [[ENTRY:.*:]] -// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// HOST-NEXT: ret i32 0 -// -int main() { - return 0; -} //. // HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp index 6a57ce1ab74c7..686428726f75c 100644 --- a/clang/test/CodeGenHIP/hip_weak_alias.cpp +++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE #define __device__ __attribute__((device)) +#define __host__ __attribute__((host)) extern "C" { @@ -12,10 +13,17 @@ extern "C" { // HOST: @__hip_cuid_ = global i8 0 // HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata" // HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc +// HOST: @Two = weak alias i32 (), ptr @__Two +// HOST: @Four = weak alias i32 (), ptr @__Four //. // DEVICE: @__hip_cuid_ = addrspace(1) global i8 0 // DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" // DEVICE: @One = weak alias i32 (), ptr @__One +// DEVICE: @Two = weak alias i32 (), ptr @__Two +// DEVICE: @Three = weak alias i32 (), ptr @__Three +// DEVICE: @Five = weak alias i32 (), ptr @__Five +// DEVICE: @_Z3Sixv = weak alias i32 (), ptr @_Z5__Sixv +// DEVICE: @_Z3Sixf = weak alias float (float), ptr @_Z5__Sixf //. // HOST-LABEL: define dso_local i32 @__HostFunc( // HOST-SAME: ) #[[ATTR0:[0-9]+]] { @@ -23,7 +31,7 @@ extern "C" { // HOST-NEXT: ret i32 42 // int __HostFunc(void) { return 42; } -int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); +int HostFunc(void) __attribute__((weak, alias("__HostFunc"))); // DEVICE-LABEL: define dso_local i32 @__One( // DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { @@ -33,23 +41,77 @@ int HostFunc(void) __attribute__ ((weak, alias("__HostFunc"))); // DEVICE-NEXT: ret i32 2 // __device__ int __One(void) { return 2; } -__device__ int One(void) __attribute__ ((weak, alias("__One"))); +__device__ int One(void) __attribute__((weak, alias("__One"))); -} +// HOST-LABEL: define dso_local i32 @__Two( +// HOST-SAME: ) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 2 +// +// DEVICE-LABEL: define dso_local i32 @__Two( +// DEVICE-SAME: ) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +__host__ __device__ int __Two(void) { return 2; } +__host__ __device__ int Two(void) __attribute__((weak, alias("__Two"))); -// HOST-LABEL: define dso_local noundef i32 @main( -// HOST-SAME: ) #[[ATTR1:[0-9]+]] { +// DEVICE-LABEL: define linkonce_odr i32 @__Three( +// DEVICE-SAME: ) #[[ATTR0]] comdat { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +__device__ constexpr int __Three(void) { return 2; } +__device__ int Three(void) __attribute__((weak, alias("__Three"))); + +// HOST-LABEL: define linkonce_odr i32 @__Four( +// HOST-SAME: ) #[[ATTR0]] comdat { // HOST-NEXT: [[ENTRY:.*:]] -// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// HOST-NEXT: ret i32 0 +// HOST-NEXT: ret i32 2 // -int main() { - return 0; +constexpr int __Four(void) { return 2; } +int Four(void) __attribute__((weak, alias("__Four"))); + +// DEVICE-LABEL: define linkonce_odr i32 @__Five( +// DEVICE-SAME: ) #[[ATTR0]] comdat { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +__device__ constexpr int __Five(void) { return 2; } +__device__ int Five(void) __attribute__((weak, alias("__Five"))); } + +// DEVICE-LABEL: define dso_local noundef i32 @_Z5__Sixv( +// DEVICE-SAME: ) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +__device__ int __Six(void) { return 2; } +// DEVICE-LABEL: define dso_local noundef float @_Z5__Sixf( +// DEVICE-SAME: float noundef [[F:%.*]]) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// DEVICE-NEXT: [[F_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr +// DEVICE-NEXT: store float [[F]], ptr [[F_ADDR_ASCAST]], align 4 +// DEVICE-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR_ASCAST]], align 4 +// DEVICE-NEXT: [[MUL:%.*]] = fmul contract float 2.000000e+00, [[TMP0]] +// DEVICE-NEXT: ret float [[MUL]] +// +__device__ float __Six(float f) { return 2.0f * f; } +__device__ int Six(void) __attribute__((weak, alias("_Z5__Sixv"))); +__device__ float Six(float f) __attribute__((weak, alias("_Z5__Sixf"))); //. // HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. // DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } //. diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c index bf8645bef6d78..a74e9031a86cb 100644 --- a/clang/test/OpenMP/amdgcn_weak_alias.c +++ b/clang/test/OpenMP/amdgcn_weak_alias.c @@ -60,19 +60,6 @@ int Two(void) __attribute__ ((weak, alias("__Two"))); int __Three(void) { return 3; } #pragma omp end declare target int Three(void) __attribute__ ((weak, alias("__Three"))); - - -// HOST-LABEL: define dso_local i32 @main( -// HOST-SAME: ) #[[ATTR0]] { -// HOST-NEXT: [[ENTRY:.*:]] -// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// HOST-NEXT: ret i32 0 -// -int main(){ - return 0; -} - //. // HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. diff --git a/clang/test/OpenMP/amdgcn_weak_alias.cpp b/clang/test/OpenMP/amdgcn_weak_alias.cpp new file mode 100644 index 0000000000000..3362220a6032e --- /dev/null +++ b/clang/test/OpenMP/amdgcn_weak_alias.cpp @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6 +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE + +//. +// HOST: @_Z3Onev = weak alias i32 (), ptr @_Z5__Onev +// HOST: @_Z3Onef = weak alias float (float), ptr @_Z5__Onef +// HOST: @_Z3Twov = weak alias i32 (), ptr @_Z5__Twov +// HOST: @_Z3Twof = weak alias float (float), ptr @_Z5__Twof +// HOST: @_Z5Threev = weak alias i32 (), ptr @_Z7__Threev +// HOST: @_Z4Fourv = weak alias i32 (), ptr @_Z6__Fourv +//. +// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0 +// DEVICE: @_Z3Twov = weak hidden alias i32 (), ptr @_Z5__Twov +// DEVICE: @_Z3Twof = weak hidden alias float (float), ptr @_Z5__Twof +// DEVICE: @_Z5Threev = weak hidden alias i32 (), ptr @_Z7__Threev +//. +// HOST-LABEL: define dso_local noundef i32 @_Z5__Onev( +// HOST-SAME: ) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 1 +// +int __One(void) { return 1; } + +// HOST-LABEL: define dso_local noundef float @_Z5__Onef( +// HOST-SAME: float noundef [[F:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +// HOST-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +// HOST-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 +// HOST-NEXT: [[MUL:%.*]] = fmul float 1.000000e+00, [[TMP0]] +// HOST-NEXT: ret float [[MUL]] +// +float __One(float f) { return 1.0f * f; } +int One(void) __attribute__((weak, alias("_Z5__Onev"))); +float One(float f) __attribute__((weak, alias("_Z5__Onef"))); + +#pragma omp declare target +// HOST-LABEL: define dso_local noundef i32 @_Z5__Twov( +// HOST-SAME: ) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 2 +// +// DEVICE-LABEL: define hidden noundef i32 @_Z5__Twov( +// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 2 +// +int __Two(void) { return 2; } +// HOST-LABEL: define dso_local noundef float @_Z5__Twof( +// HOST-SAME: float noundef [[F:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +// HOST-NEXT: store float [[F]], ptr [[F_ADDR]], align 4 +// HOST-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4 +// HOST-NEXT: [[MUL:%.*]] = fmul float 2.000000e+00, [[TMP0]] +// HOST-NEXT: ret float [[MUL]] +// +// DEVICE-LABEL: define hidden noundef float @_Z5__Twof( +// DEVICE-SAME: float noundef [[F:%.*]]) #[[ATTR0]] { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// DEVICE-NEXT: [[F_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr +// DEVICE-NEXT: store float [[F]], ptr [[F_ADDR_ASCAST]], align 4 +// DEVICE-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR_ASCAST]], align 4 +// DEVICE-NEXT: [[MUL:%.*]] = fmul float 2.000000e+00, [[TMP0]] +// DEVICE-NEXT: ret float [[MUL]] +// +float __Two(float f) { return 2.0f * f; } +int Two(void) __attribute__((weak, alias("_Z5__Twov"))); +float Two(float f) __attribute__((weak, alias("_Z5__Twof"))); +#pragma omp end declare target + +#pragma omp declare target +// HOST-LABEL: define linkonce_odr noundef i32 @_Z7__Threev( +// HOST-SAME: ) #[[ATTR0]] comdat { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 3 +// +// DEVICE-LABEL: define linkonce_odr hidden noundef i32 @_Z7__Threev( +// DEVICE-SAME: ) #[[ATTR0]] comdat { +// DEVICE-NEXT: [[ENTRY:.*:]] +// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEVICE-NEXT: ret i32 3 +// +constexpr int __Three(void) { return 3; } +int Three(void) __attribute__((weak, alias("_Z7__Threev"))); +#pragma omp end declare target +// HOST-LABEL: define linkonce_odr noundef i32 @_Z6__Fourv( +// HOST-SAME: ) #[[ATTR0]] comdat { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: ret i32 4 +// +constexpr int __Four(void) { return 4; } +int Four(void) __attribute__((weak, alias("_Z6__Fourv"))); +//. +// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +//. +// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51} +// HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. +// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51} +// DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51} +// DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/OpenMP/nvptx_weak_alias.c b/clang/test/OpenMP/nvptx_weak_alias.c index 695bd7d0b8af9..e5e1b4409a5a5 100644 --- a/clang/test/OpenMP/nvptx_weak_alias.c +++ b/clang/test/OpenMP/nvptx_weak_alias.c @@ -13,18 +13,6 @@ // int __One(void) { return 1; } int One(void) __attribute__ ((weak, alias("__One"))); - - -// CHECK-LABEL: define dso_local i32 @main( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 0 -// -int main(){ - return 0; -} //. // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } //. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
