llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Select to minimum3/maximum3. Leave f16/v2f16 for later since it's complicated by only having the vector version. --- Patch is 185.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117634.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+17) - (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+813-555) - (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+813-555) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll (+165-256) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll (+165-256) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d35bb15ac6566a..914b25245c95bf 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM}, {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, Custom); + } else { + // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum + if (Subtarget->hasMinimum3Maximum3F32()) + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5d4d56e8b0ad22..2b207e008581b3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1234,6 +1234,23 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>; def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>; def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>; +//===----------------------------------------------------------------------===// +// Floating-point operation Patterns +//===----------------------------------------------------------------------===// + +// Implement fminimum(x, y) by using minimum3(x, y, y) +class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt, + Instruction inst> : GCNPat< + (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))), + (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1) +>; + +// Prefer the real 2 operand form if legal +let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in { +def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>; +def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 08122cd0d89eab..209ae86b4dedce 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -14,19 +14,26 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -43,19 +50,26 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %c, float %max0) ret float %max1 @@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: v_readfirstlane_b32 s0, v0 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %cast = bitcast float %max1 to i32 @@ -103,19 +126,26 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -133,19 +163,26 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fabs = call float @llvm.fabs.f32(float %b) %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -163,19 +200,26 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fabs = call float @llvm.fabs.f32(float %c) %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs) @@ -193,19 +237,26 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -225,19 +276,26 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %b.fneg = fneg float %b %c.fneg = fneg float %c @@ -257,19 +315,26 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -292,19 +357,26 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -322,19 +394,26 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg1: +; GFX940: ; %bb.0: +; GFX940-NE... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/117634 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits