llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Since the input is either known not-nan, or we have explicit use code checking if the input is a nan, any of the 3 is valid to match. --- Patch is 42.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141987.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+14-5) - (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+216-273) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 52177a2523bcb..a3f668e6d65ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -327,7 +327,7 @@ class AMDGPUCodeGenPrepareImpl bool visitIntrinsicInst(IntrinsicInst &I); bool visitBitreverseIntrinsicInst(IntrinsicInst &I); - bool visitMinNum(IntrinsicInst &I); + bool visitFMinLike(IntrinsicInst &I); bool visitSqrt(IntrinsicInst &I); bool run(); }; @@ -2197,7 +2197,9 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) { case Intrinsic::bitreverse: return visitBitreverseIntrinsicInst(I); case Intrinsic::minnum: - return visitMinNum(I); + case Intrinsic::minimumnum: + case Intrinsic::minimum: + return visitFMinLike(I); case Intrinsic::sqrt: return visitSqrt(I); default: @@ -2216,7 +2218,9 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) { } /// Match non-nan fract pattern. -/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0) +/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) +/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0)) +/// minimum(fsub(x, floor(x)), nextafter(1.0, -1.0)) /// /// If fract is a useful instruction for the subtarget. Does not account for the /// nan handling; the instruction has a nan check on the input value. @@ -2224,7 +2228,12 @@ Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) { if (ST.hasFractBug()) return nullptr; - if (I.getIntrinsicID() != Intrinsic::minnum) + Intrinsic::ID IID = I.getIntrinsicID(); + + // The value is only used in contexts where we know the input isn't a nan, so + // any of the fmin variants are fine. + if (IID != Intrinsic::minnum && + IID != Intrinsic::minimumnum & IID != Intrinsic::minimum) return nullptr; Type *Ty = I.getType(); @@ -2270,7 +2279,7 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder, return insertValues(Builder, FractArg->getType(), ResultVals); } -bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) { +bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) { Value *FractArg = matchFractPat(I); if (!FractArg) return false; diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index 9d98a8dab0501..4ee48716439bd 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -2996,19 +2996,30 @@ entry: } define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly captures(none) %ip) { -; IR-LABEL: define float @safe_math_fract_f32_minimum( -; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { -; IR-NEXT: [[ENTRY:.*:]] -; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) -; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] -; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) -; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 -; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] -; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) -; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 -; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] -; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 -; IR-NEXT: ret float [[COND6]] +; GFX6-IR-LABEL: define float @safe_math_fract_f32_minimum( +; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; GFX6-IR-NEXT: [[ENTRY:.*:]] +; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) +; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 +; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] +; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; GFX6-IR-NEXT: ret float [[COND6]] +; +; IR-FRACT-LABEL: define float @safe_math_fract_f32_minimum( +; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; IR-FRACT-NEXT: [[ENTRY:.*:]] +; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) +; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; IR-FRACT-NEXT: ret float [[COND6]] ; ; GFX6-LABEL: safe_math_fract_f32_minimum: ; GFX6: ; %bb.0: ; %entry @@ -3035,20 +3046,14 @@ define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly c ; GFX7-LABEL: safe_math_fract_f32_minimum: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX7-NEXT: v_mov_b32_e32 v6, 0x7fc00000 -; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v4 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc -; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 ; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX7-NEXT: v_fract_f32_e32 v4, v0 ; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 +; GFX7-NEXT: v_floor_f32_e32 v3, v0 ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -3057,16 +3062,10 @@ define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly c ; GFX8-LABEL: safe_math_fract_f32_minimum: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_floor_f32_e32 v3, v0 -; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX8-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX8-NEXT: v_mov_b32_e32 v6, 0x7fc00000 -; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX8-NEXT: v_fract_f32_e32 v4, v0 ; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_floor_f32_e32 v3, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX8-NEXT: global_store_dword v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) @@ -3075,18 +3074,12 @@ define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly c ; GFX11-LABEL: safe_math_fract_f32_minimum: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_floor_f32_e32 v3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX11-NEXT: global_store_b32 v[1:2], v3, off -; GFX11-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v4 -; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v5, vcc_lo -; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo +; GFX11-NEXT: v_fract_f32_e32 v3, v0 ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_floor_f32_e32 v4, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX11-NEXT: global_store_b32 v[1:2], v4, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: safe_math_fract_f32_minimum: @@ -3096,17 +3089,12 @@ define float @safe_math_fract_f32_minimum(float %x, ptr addrspace(1) writeonly c ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_floor_f32_e32 v3, v0 -; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX12-NEXT: global_store_b32 v[1:2], v3, off -; GFX12-NEXT: v_minimum_f32 v4, 0x3f7fffff, v4 -; GFX12-NEXT: s_wait_alu 0xfffd -; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo +; GFX12-NEXT: v_fract_f32_e32 v3, v0 ; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX12-NEXT: v_floor_f32_e32 v4, v0 ; GFX12-NEXT: s_wait_alu 0xfffd -; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX12-NEXT: global_store_b32 v[1:2], v4, off ; GFX12-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -3122,19 +3110,30 @@ entry: } define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeonly captures(none) %ip) { -; IR-LABEL: define float @safe_math_fract_f32_minimum_swap( -; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { -; IR-NEXT: [[ENTRY:.*:]] -; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) -; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] -; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) -; IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00 -; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]] -; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) -; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 -; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] -; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 -; IR-NEXT: ret float [[COND6]] +; GFX6-IR-LABEL: define float @safe_math_fract_f32_minimum_swap( +; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; GFX6-IR-NEXT: [[ENTRY:.*:]] +; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) +; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00 +; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]] +; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; GFX6-IR-NEXT: ret float [[COND6]] +; +; IR-FRACT-LABEL: define float @safe_math_fract_f32_minimum_swap( +; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; IR-FRACT-NEXT: [[ENTRY:.*:]] +; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) +; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; IR-FRACT-NEXT: ret float [[COND6]] ; ; GFX6-LABEL: safe_math_fract_f32_minimum_swap: ; GFX6: ; %bb.0: ; %entry @@ -3161,20 +3160,14 @@ define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeo ; GFX7-LABEL: safe_math_fract_f32_minimum_swap: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX7-NEXT: v_mov_b32_e32 v6, 0x7fc00000 -; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v4 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc -; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 ; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX7-NEXT: v_fract_f32_e32 v4, v0 ; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 +; GFX7-NEXT: v_floor_f32_e32 v3, v0 ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -3183,16 +3176,10 @@ define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeo ; GFX8-LABEL: safe_math_fract_f32_minimum_swap: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_floor_f32_e32 v3, v0 -; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX8-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX8-NEXT: v_mov_b32_e32 v6, 0x7fc00000 -; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc -; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc +; GFX8-NEXT: v_fract_f32_e32 v4, v0 ; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 +; GFX8-NEXT: v_floor_f32_e32 v3, v0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; GFX8-NEXT: global_store_dword v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) @@ -3201,18 +3188,12 @@ define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeo ; GFX11-LABEL: safe_math_fract_f32_minimum_swap: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_floor_f32_e32 v3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX11-NEXT: global_store_b32 v[1:2], v3, off -; GFX11-NEXT: v_min_f32_e32 v5, 0x3f7fffff, v4 -; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v4 -; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v5, vcc_lo -; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc_lo +; GFX11-NEXT: v_fract_f32_e32 v3, v0 ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX11-NEXT: v_floor_f32_e32 v4, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX11-NEXT: global_store_b32 v[1:2], v4, off ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: safe_math_fract_f32_minimum_swap: @@ -3222,17 +3203,12 @@ define float @safe_math_fract_f32_minimum_swap(float %x, ptr addrspace(1) writeo ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_floor_f32_e32 v3, v0 -; GFX12-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX12-NEXT: global_store_b32 v[1:2], v3, off -; GFX12-NEXT: v_minimum_f32 v4, 0x3f7fffff, v4 -; GFX12-NEXT: s_wait_alu 0xfffd -; GFX12-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc_lo +; GFX12-NEXT: v_fract_f32_e32 v3, v0 ; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX12-NEXT: v_floor_f32_e32 v4, v0 ; GFX12-NEXT: s_wait_alu 0xfffd -; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo +; GFX12-NEXT: global_store_b32 v[1:2], v4, off ; GFX12-NEXT: s_setpc_b64 s[30:31] entry: %floor = tail call float @llvm.floor.f32(float %x) @@ -3248,19 +3224,30 @@ entry: } define float @safe_math_fract_f32_minimumnum(float %x, ptr addrspace(1) writeonly captures(none) %ip) { -; IR-LABEL: define float @safe_math_fract_f32_minimumnum( -; IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { -; IR-NEXT: [[ENTRY:.*:]] -; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) -; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] -; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimumnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) -; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 -; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] -; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) -; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 -; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] -; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 -; IR-NEXT: ret float [[COND6]] +; GFX6-IR-LABEL: define float @safe_math_fract_f32_minimumnum( +; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; GFX6-IR-NEXT: [[ENTRY:.*:]] +; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] +; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minimumnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) +; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 +; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] +; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; GFX6-IR-NEXT: ret float [[COND6]] +; +; IR-FRACT-LABEL: define float @safe_math_fract_f32_minimumnum( +; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { +; IR-FRACT-NEXT: [[ENTRY:.*:]] +; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) +; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) +; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) +; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 +; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] +; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 +; IR-FRACT-NEXT: ret float [[COND6]] ; ; GFX6-LABEL: safe_math_fract_f32_minimumnum: ; GFX6: ; %bb.0: ; %entry @@ -3284,17 +3271,14 @@ define float @safe_math_fract_f32_minimumnum(float %x, ptr addrspace(1) writeonl ; GFX7-LABEL: safe_math_fract_f32_minimumnum: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_floor_f32_e32 v3, v0 -; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 -; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 -; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 ; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc +; GFX7-NEXT: v_fract_f32_e32 v4, v0 ; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s4, s6 ; GFX7-NEXT: s_mov_b32 s5, s6 +; GFX7-NE... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/141987 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits