https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/138284
>From b277b2e90a3665c5e945ddff47c6c55a7fdb8d33 Mon Sep 17 00:00:00 2001 From: Lucas Ramirez <lucas.r...@proton.me> Date: Thu, 22 May 2025 13:26:57 +0000 Subject: [PATCH 1/3] Rebase on main (integrate changes from 1b34722) --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 27 ++++----- clang/lib/Sema/SemaAMDGPU.cpp | 5 -- clang/test/SemaOpenCL/amdgpu-attrs.cl | 1 - llvm/lib/IR/Verifier.cpp | 23 ++++++++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 4 ++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 46 ++++++++------- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 +- .../AMDGPU/attr-amdgpu-waves-per-eu.ll | 12 ++++ ...-work-group-size-overrides-waves-per-eu.ll | 4 +- .../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 56 ++++++++----------- .../Verifier/AMDGPU/amdgpu-waves-per-eu.ll | 40 +++++++++++++ 11 files changed, 147 insertions(+), 75 deletions(-) create mode 100644 llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 47a552a7bf495..d2c43f86b7b13 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -743,20 +743,21 @@ void CodeGenModule::handleAMDGPUWavesPerEUAttr( llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) { unsigned Min = Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue(); - unsigned Max = - Attr->getMax() - ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue() - : 0; - if (Min != 0) { - assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); - - std::string AttrVal = llvm::utostr(Min); - if (Max != 0) - AttrVal = AttrVal + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-waves-per-eu", AttrVal); - } else - assert(Max == 0 && "Max must be zero"); + if (Attr->getMax()) { + unsigned Max = + Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue(); + assert(Min == 0 || (Min != 0 && Max != 0) && + "Min must be non-zero when Max is non-zero"); + assert(Min <= Max && "Min must be less than or equal to Max"); + // Do not add the attribute if min,max=0,0. + if (Min != 0) { + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } + } else if (Min != 0) { + F->addFnAttr("amdgpu-waves-per-eu", llvm::utostr(Min)); + } } std::unique_ptr<TargetCodeGenInfo> diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index e6414a623b929..9ae3ec1289def 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -244,11 +244,6 @@ static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr, if (MaxExpr && !S.checkUInt32Argument(Attr, MaxExpr, Max, 1)) return true; - if (Min == 0 && Max != 0) { - S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) - << &Attr << 0; - return true; - } if (Max != 0 && Min > Max) { S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) << &Attr << 1; diff --git a/clang/test/SemaOpenCL/amdgpu-attrs.cl b/clang/test/SemaOpenCL/amdgpu-attrs.cl index 89ba3f86803c5..b9b44dff4d4a9 100644 --- a/clang/test/SemaOpenCL/amdgpu-attrs.cl +++ b/clang/test/SemaOpenCL/amdgpu-attrs.cl @@ -46,7 +46,6 @@ __attribute__((amdgpu_num_sgpr(4294967296))) kernel void kernel_num_sgpr_L() {} __attribute__((amdgpu_num_vgpr(4294967296))) kernel void kernel_num_vgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} __attribute__((amdgpu_flat_work_group_size(0, 64))) kernel void kernel_flat_work_group_size_0_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute argument is invalid: max must be 0 since min is 0}} -__attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: max must be 0 since min is 0}} __attribute__((amdgpu_flat_work_group_size(64, 32))) kernel void kernel_flat_work_group_size_64_32() {} // expected-error {{'amdgpu_flat_work_group_size' attribute argument is invalid: min must not be greater than max}} __attribute__((amdgpu_waves_per_eu(4, 2))) kernel void kernel_waves_per_eu_4_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: min must not be greater than max}} diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 9cab88b09779a..13b62ad548b63 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2519,6 +2519,29 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, CheckFailed("invalid value for 'denormal-fp-math-f32' attribute: " + S, V); } + + if (TT.isAMDGPU()) { + if (auto A = Attrs.getFnAttr("amdgpu-waves-per-eu"); A.isValid()) { + std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); + unsigned Min = 0; + StringRef MinStr = Strs.first.trim(); + Check(!MinStr.getAsInteger(0, Min), + "minimum for 'amdgpu-waves-per-eu' must be integer: " + MinStr); + if (!Strs.second.empty()) { + unsigned Max = 0; + StringRef MaxStr = Strs.second.trim(); + Check(!MaxStr.getAsInteger(0, Max), + "maximum for 'amdgpu-waves-per-eu' must be integer: " + MaxStr); + Check(Max, "maximum for 'amdgpu-waves-per-eu' must be non-zero"); + Check(Min <= Max, "minimum must be less than or equal to maximum for " + "'amdgpu-waves-per-eu': " + + MinStr + " > " + MaxStr); + } else { + Check(Min, "minimum for 'amdgpu-waves-per-eu' must be non-zero when " + "maximum is not provided"); + } + } + } } void Verifier::verifyFunctionMetadata( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index fef22c81c9391..2a78d6382ad64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1363,6 +1363,10 @@ static bool updateWavesPerEU(Module &M, TargetMachine &TM) { return Changed; } +// 14 ==> 15 +// 15 ==> 16 +// 16 ==> 17 + static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, AMDGPUAttributorOptions Options, ThinOrFullLTOPhase LTOPhase) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index d095fc6cf9549..1fcd0654fba31 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -156,15 +156,15 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { } } -std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( - const Function &F) const { +std::pair<unsigned, unsigned> +AMDGPUSubtarget::getFlatWorkGroupSizes(const Function &F) const { // Default minimum/maximum flat work group sizes. std::pair<unsigned, unsigned> Default = - getDefaultFlatWorkGroupSize(F.getCallingConv()); + getDefaultFlatWorkGroupSize(F.getCallingConv()); // Requested minimum/maximum flat work group sizes. std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute( - F, "amdgpu-flat-work-group-size", Default); + F, "amdgpu-flat-work-group-size", Default); // Make sure requested minimum is less than requested maximum. if (Requested.first > Requested.second) @@ -186,23 +186,29 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU( // sizes limits the achievable maximum, and we aim to support enough waves per // EU so that we can concurrently execute all waves of a single workgroup of // maximum size on a CU. - std::pair<unsigned, unsigned> Default = { + std::pair<unsigned, unsigned> WavesPerEU = { getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second), getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second}; - Default.first = std::min(Default.first, Default.second); - - // Make sure requested minimum is within the default range and lower than the - // requested maximum. The latter must not violate target specification. - if (RequestedWavesPerEU.first < Default.first || - RequestedWavesPerEU.first > Default.second || - RequestedWavesPerEU.first > RequestedWavesPerEU.second || - RequestedWavesPerEU.second > getMaxWavesPerEU()) - return Default; - - // We cannot exceed maximum occupancy implied by flat workgroup size and LDS. - RequestedWavesPerEU.second = - std::min(RequestedWavesPerEU.second, Default.second); - return RequestedWavesPerEU; + WavesPerEU.first = std::min(WavesPerEU.first, WavesPerEU.second); + + // Requested minimum must not violate subtarget's specifications and be no + // greater than maximum. + if (RequestedWavesPerEU.first && + (RequestedWavesPerEU.first < getMinWavesPerEU() || + RequestedWavesPerEU.first > RequestedWavesPerEU.second)) + return WavesPerEU; + // Requested maximum must not violate subtarget's specifications. + if (RequestedWavesPerEU.second > getMaxWavesPerEU()) + return WavesPerEU; + + // A requested maximum may limit both the final minimum and maximum, but + // not increase them. A requested minimum can either decrease or increase the + // default minimum as long as it doesn't exceed the maximum. + WavesPerEU.second = std::min(WavesPerEU.second, RequestedWavesPerEU.second); + if (RequestedWavesPerEU.first) + WavesPerEU.first = RequestedWavesPerEU.first; + WavesPerEU.first = std::min(WavesPerEU.first, WavesPerEU.second); + return WavesPerEU; } std::pair<unsigned, unsigned> @@ -229,7 +235,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes, unsigned LDSBytes, const Function &F) const { // Default minimum/maximum number of waves per execution unit. - std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); + std::pair<unsigned, unsigned> Default(0, getMaxWavesPerEU()); // Requested minimum/maximum number of waves per execution unit. std::pair<unsigned, unsigned> Requested = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 7c24f428d78e4..1c482aa1e3a46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -127,7 +127,9 @@ class AMDGPUSubtarget { /// Returns the target minimum/maximum number of waves per EU. This is based /// on the minimum/maximum number of \p RequestedWavesPerEU and further /// limited by the maximum achievable occupancy derived from the range of \p - /// FlatWorkGroupSizes and number of \p LDSBytes per workgroup. + /// FlatWorkGroupSizes and number of \p LDSBytes per workgroup. A + /// minimum requested waves/EU value of 0 indicates an intent to not restrict + /// the minimum target occupancy. std::pair<unsigned, unsigned> getEffectiveWavesPerEU(std::pair<unsigned, unsigned> RequestedWavesPerEU, std::pair<unsigned, unsigned> FlatWorkGroupSizes, diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll index e9fe4f3c618c7..2ab38a9f12a52 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll @@ -225,3 +225,15 @@ entry: ret void } attributes #12 = {"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2,10" "amdgpu-lds-size"="16384"} + +; At most 2 waves per execution unit. +; CHECK-LABEL: {{^}}empty_at_most_2: +; CHECK: SGPRBlocks: 12 +; CHECK: VGPRBlocks: 21 +; CHECK: NumSGPRsForWavesPerEU: 102 +; CHECK: NumVGPRsForWavesPerEU: 85 +define amdgpu_kernel void @empty_at_most_2() #13 { +entry: + ret void +} +attributes #13 = {"amdgpu-waves-per-eu"="0,2"} diff --git a/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll index 67061bcb2a785..f8c7b593e6eba 100644 --- a/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll +++ b/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll @@ -57,5 +57,5 @@ entry: ret void } -attributes #0 = { "amdgpu-waves-per-eu"="1,1" } -attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1024" } +attributes #0 = { "amdgpu-waves-per-eu"="1" } +attributes #1 = { "amdgpu-waves-per-eu"="1" "amdgpu-flat-work-group-size"="1,1024" } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll index b87d266cc2514..5e1cae0760c36 100644 --- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s -; Check propagation of amdgpu-flat-work-group-size attribute. +; Check propagation of amdgpu-waves-per-eu attribute. ; Called from a single kernel with 1,8 define internal void @default_to_1_8_a() { @@ -216,41 +216,30 @@ define internal i32 @bitcasted_function() { ret i32 0 } -define internal void @called_from_invalid_bounds_0() { -; CHECK-LABEL: define internal void @called_from_invalid_bounds_0 -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: ret void -; +define internal void @called_without_min_waves() { ret void } -define internal void @called_from_invalid_bounds_1() { -; CHECK-LABEL: define internal void @called_from_invalid_bounds_1 +define internal void @called_from_invalid_bounds() { +; CHECK-LABEL: define internal void @called_from_invalid_bounds ; CHECK-SAME: () #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void } -; Invalid range for amdgpu-waves-per-eu -define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 { -; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8 -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @called_from_invalid_bounds_0() -; CHECK-NEXT: ret void -; - call void @called_from_invalid_bounds_0() +define internal void @called_from_invalid_bounds_1() { + call void @called_without_min_waves() ret void } - ; Invalid range for amdgpu-waves-per-eu define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123 ; CHECK-SAME: () #[[ATTR11:[0-9]+]] { -; CHECK-NEXT: call void @called_from_invalid_bounds_1() +; CHECK-NEXT: call void @called_from_invalid_bounds() ; CHECK-NEXT: ret void ; - call void @called_from_invalid_bounds_1() + call void @called_from_invalid_bounds() ret void } @@ -279,7 +268,7 @@ define amdgpu_kernel void @kernel_3_6() #12 { ; 3,6 -> 6,9 define internal void @refine_upper_func_3_6() #13 { ; CHECK-LABEL: define internal void @refine_upper_func_3_6 -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -288,7 +277,7 @@ define internal void @refine_upper_func_3_6() #13 { ; 4,8 -> 6,8 define internal void @refine_lower_func_4_8() #14 { ; CHECK-LABEL: define internal void @refine_lower_func_4_8 -; CHECK-SAME: () #[[ATTR14:[0-9]+]] { +; CHECK-SAME: () #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: ret void ; @@ -298,7 +287,7 @@ define internal void @refine_lower_func_4_8() #14 { define amdgpu_kernel void @kernel_foo_6_8() #15 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8 -; CHECK-SAME: () #[[ATTR15:[0-9]+]] { +; CHECK-SAME: () #[[ATTR16:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: call void @refine_lower_func_4_8() ; CHECK-NEXT: call void @func_9_10_a() @@ -313,7 +302,7 @@ define amdgpu_kernel void @kernel_foo_6_8() #15 { ; 5,5 -> 5,5 define internal void @func_5_5() #16 { ; CHECK-LABEL: define internal void @func_5_5 -; CHECK-SAME: () #[[ATTR16:[0-9]+]] { +; CHECK-SAME: () #[[ATTR17:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -322,7 +311,7 @@ define internal void @func_5_5() #16 { ; 5,8 -> 8,8 define internal void @func_5_8() #17 { ; CHECK-LABEL: define internal void @func_5_8 -; CHECK-SAME: () #[[ATTR17:[0-9]+]] { +; CHECK-SAME: () #[[ATTR18:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -331,7 +320,7 @@ define internal void @func_5_8() #17 { ; 9,10 -> 9,10 define internal void @func_9_10_a() #18 { ; CHECK-LABEL: define internal void @func_9_10_a -; CHECK-SAME: () #[[ATTR18:[0-9]+]] { +; CHECK-SAME: () #[[ATTR19:[0-9]+]] { ; CHECK-NEXT: ret void ; ret void @@ -340,7 +329,7 @@ define internal void @func_9_10_a() #18 { ; 9,10 -> 9,9 define internal void @func_9_10_b() #18 { ; CHECK-LABEL: define internal void @func_9_10_b -; CHECK-SAME: () #[[ATTR18]] { +; CHECK-SAME: () #[[ATTR19]] { ; CHECK-NEXT: ret void ; ret void @@ -348,7 +337,7 @@ define internal void @func_9_10_b() #18 { define amdgpu_kernel void @kernel_bar_8_9() #19 { ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9 -; CHECK-SAME: () #[[ATTR19:[0-9]+]] { +; CHECK-SAME: () #[[ATTR20:[0-9]+]] { ; CHECK-NEXT: call void @refine_upper_func_3_6() ; CHECK-NEXT: call void @func_5_5() ; CHECK-NEXT: call void @func_9_10_b() @@ -413,10 +402,11 @@ attributes #19 = { "amdgpu-waves-per-eu"="8,9" } ; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll b/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll new file mode 100644 index 0000000000000..8686a4278147e --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll @@ -0,0 +1,40 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +target triple = "amdgcn-amd-amdhsa" + +define void @valid_amdgpu_waves_per_eu_range() "amdgpu-waves-per-eu"="2,4" { + ret void +} + +define void @valid_amdgpu_waves_per_eu_min_only() "amdgpu-waves-per-eu"="2" { + ret void +} + +define void @valid_amdgpu_waves_per_eu_max_only() "amdgpu-waves-per-eu"="0,4" { + ret void +} + +; CHECK: minimum for 'amdgpu-waves-per-eu' must be integer: x +define void @invalid_amdgpu_waves_per_eu_min_nan() "amdgpu-waves-per-eu"="x" { + ret void +} + +; CHECK: maximum for 'amdgpu-waves-per-eu' must be integer: x +define void @invalid_amdgpu_waves_per_eu_max_nan() "amdgpu-waves-per-eu"="0,x" { + ret void +} + +; CHECK: minimum for 'amdgpu-waves-per-eu' must be non-zero when maximum is not provided +define void @invalid_amdgpu_waves_per_eu_min_zero() "amdgpu-waves-per-eu"="0" { + ret void +} + +; CHECK: maximum for 'amdgpu-waves-per-eu' must be non-zero +define void @invalid_amdgpu_waves_per_eu_max_zero() "amdgpu-waves-per-eu"="2,0" { + ret void +} + +; CHECK: minimum must be less than or equal to maximum for 'amdgpu-waves-per-eu': 2 > 1 +define void @invalid_amdgpu_waves_per_eu_max_lt_min() "amdgpu-waves-per-eu"="2,1" { + ret void +} >From 1bb023b51e7fa0e4617cfe4662c4ea71267310a9 Mon Sep 17 00:00:00 2001 From: Lucas Ramirez <lucas.r...@proton.me> Date: Thu, 22 May 2025 13:59:39 +0000 Subject: [PATCH 2/3] Remove spurious comment --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 2a78d6382ad64..fef22c81c9391 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1363,10 +1363,6 @@ static bool updateWavesPerEU(Module &M, TargetMachine &TM) { return Changed; } -// 14 ==> 15 -// 15 ==> 16 -// 16 ==> 17 - static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, AMDGPUAttributorOptions Options, ThinOrFullLTOPhase LTOPhase) { >From 1c1f1395105879d384abf19737c2cb41249fc0fe Mon Sep 17 00:00:00 2001 From: Lucas Ramirez <lucas.r...@proton.me> Date: Fri, 27 Jun 2025 18:24:25 +0000 Subject: [PATCH 3/3] Remove verifier code and add codegen tests --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 2 +- clang/test/CodeGenHIP/amdgpu-waves-per-eu.hip | 53 +++++++++++++++++++ clang/test/SemaOpenCL/amdgpu-attrs.cl | 1 + llvm/lib/IR/Verifier.cpp | 23 -------- .../Verifier/AMDGPU/amdgpu-waves-per-eu.ll | 40 -------------- 5 files changed, 55 insertions(+), 64 deletions(-) create mode 100644 clang/test/CodeGenHIP/amdgpu-waves-per-eu.hip delete mode 100644 llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index d2c43f86b7b13..5dd65103fbbb9 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -751,7 +751,7 @@ void CodeGenModule::handleAMDGPUWavesPerEUAttr( "Min must be non-zero when Max is non-zero"); assert(Min <= Max && "Min must be less than or equal to Max"); // Do not add the attribute if min,max=0,0. - if (Min != 0) { + if (Max != 0) { std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); F->addFnAttr("amdgpu-waves-per-eu", AttrVal); } diff --git a/clang/test/CodeGenHIP/amdgpu-waves-per-eu.hip b/clang/test/CodeGenHIP/amdgpu-waves-per-eu.hip new file mode 100644 index 0000000000000..1fb7fd1501f0a --- /dev/null +++ b/clang/test/CodeGenHIP/amdgpu-waves-per-eu.hip @@ -0,0 +1,53 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device -emit-llvm -o - %s | FileCheck %s + +// COM: Most tests are in the OpenCL semastics, this is just a verification for HIP + +#define __global__ __attribute__((global)) + +//. +// CHECK: @__hip_cuid_ = addrspace(1) global i8 0 +// CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" +//. +// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@_Z21kernel_waves_per_eu_0v +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +__global__ __attribute__((amdgpu_waves_per_eu(0))) void kernel_waves_per_eu_0() {} + +// Equivalent to kernel_waves_per_eu_0. +// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@_Z23kernel_waves_per_eu_0_0v +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +__global__ __attribute__((amdgpu_waves_per_eu(0, 0))) void kernel_waves_per_eu_0_0() {} + +// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@_Z23kernel_waves_per_eu_0_4v +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +__global__ __attribute__((amdgpu_waves_per_eu(0, 4))) void kernel_waves_per_eu_0_4() {} + +// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@_Z23kernel_waves_per_eu_1_4v +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +__global__ __attribute__((amdgpu_waves_per_eu(1, 4))) void kernel_waves_per_eu_1_4() {} +//. +// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// CHECK: attributes #[[ATTR1]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-waves-per-eu"="0,4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// CHECK: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "amdgpu-waves-per-eu"="1,4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} +// CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +//. diff --git a/clang/test/SemaOpenCL/amdgpu-attrs.cl b/clang/test/SemaOpenCL/amdgpu-attrs.cl index b9b44dff4d4a9..50497d68f5991 100644 --- a/clang/test/SemaOpenCL/amdgpu-attrs.cl +++ b/clang/test/SemaOpenCL/amdgpu-attrs.cl @@ -60,6 +60,7 @@ __attribute__((amdgpu_num_vgpr(0))) kernel void kernel_num_vgpr_0() {} kernel __attribute__((amdgpu_flat_work_group_size(32, 64))) void kernel_flat_work_group_size_32_64() {} kernel __attribute__((amdgpu_waves_per_eu(2))) void kernel_waves_per_eu_2() {} +kernel __attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() {} kernel __attribute__((amdgpu_waves_per_eu(2, 4))) void kernel_waves_per_eu_2_4() {} kernel __attribute__((amdgpu_num_sgpr(32))) void kernel_num_sgpr_32() {} kernel __attribute__((amdgpu_num_vgpr(64))) void kernel_num_vgpr_64() {} diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 13b62ad548b63..9cab88b09779a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2519,29 +2519,6 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, CheckFailed("invalid value for 'denormal-fp-math-f32' attribute: " + S, V); } - - if (TT.isAMDGPU()) { - if (auto A = Attrs.getFnAttr("amdgpu-waves-per-eu"); A.isValid()) { - std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); - unsigned Min = 0; - StringRef MinStr = Strs.first.trim(); - Check(!MinStr.getAsInteger(0, Min), - "minimum for 'amdgpu-waves-per-eu' must be integer: " + MinStr); - if (!Strs.second.empty()) { - unsigned Max = 0; - StringRef MaxStr = Strs.second.trim(); - Check(!MaxStr.getAsInteger(0, Max), - "maximum for 'amdgpu-waves-per-eu' must be integer: " + MaxStr); - Check(Max, "maximum for 'amdgpu-waves-per-eu' must be non-zero"); - Check(Min <= Max, "minimum must be less than or equal to maximum for " - "'amdgpu-waves-per-eu': " + - MinStr + " > " + MaxStr); - } else { - Check(Min, "minimum for 'amdgpu-waves-per-eu' must be non-zero when " - "maximum is not provided"); - } - } - } } void Verifier::verifyFunctionMetadata( diff --git a/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll b/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll deleted file mode 100644 index 8686a4278147e..0000000000000 --- a/llvm/test/Verifier/AMDGPU/amdgpu-waves-per-eu.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s - -target triple = "amdgcn-amd-amdhsa" - -define void @valid_amdgpu_waves_per_eu_range() "amdgpu-waves-per-eu"="2,4" { - ret void -} - -define void @valid_amdgpu_waves_per_eu_min_only() "amdgpu-waves-per-eu"="2" { - ret void -} - -define void @valid_amdgpu_waves_per_eu_max_only() "amdgpu-waves-per-eu"="0,4" { - ret void -} - -; CHECK: minimum for 'amdgpu-waves-per-eu' must be integer: x -define void @invalid_amdgpu_waves_per_eu_min_nan() "amdgpu-waves-per-eu"="x" { - ret void -} - -; CHECK: maximum for 'amdgpu-waves-per-eu' must be integer: x -define void @invalid_amdgpu_waves_per_eu_max_nan() "amdgpu-waves-per-eu"="0,x" { - ret void -} - -; CHECK: minimum for 'amdgpu-waves-per-eu' must be non-zero when maximum is not provided -define void @invalid_amdgpu_waves_per_eu_min_zero() "amdgpu-waves-per-eu"="0" { - ret void -} - -; CHECK: maximum for 'amdgpu-waves-per-eu' must be non-zero -define void @invalid_amdgpu_waves_per_eu_max_zero() "amdgpu-waves-per-eu"="2,0" { - ret void -} - -; CHECK: minimum must be less than or equal to maximum for 'amdgpu-waves-per-eu': 2 > 1 -define void @invalid_amdgpu_waves_per_eu_max_lt_min() "amdgpu-waves-per-eu"="2,1" { - ret void -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits