llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Math library code has quite a few places with complex bit logic that are ultimately fed into a copysign. This helps avoid some regressions in a future patch. This assumes the position in the float type, which should at least be valid for IEEE types. Not sure if we need to guard against ppc_fp128 or anything else weird. There appears to be some value in simplifying the value operand as well, but I'll address that separately. --- Full diff: https://github.com/llvm/llvm-project/pull/97151.diff 6 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6) - (modified) llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll (+7-15) - (modified) llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll (+25-30) - (modified) llvm/test/CodeGen/RISCV/double-arith.ll (+1-3) - (modified) llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll (+4-8) - (modified) llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll (+4-6) ``````````diff diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 254d63abdf805..eaab2b3421190 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17565,6 +17565,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); + // We only take the sign bit from the sign operand. + EVT SignVT = N1.getValueType(); + if (SimplifyDemandedBits(N1, + APInt::getSignMask(SignVT.getScalarSizeInBits()))) + return SDValue(N, 0); + return SDValue(); } diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll index 1eccb55202f02..af4f236c783c6 100644 --- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll +++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll @@ -11,11 +11,10 @@ define half @test_pown_reduced_fast_f16_known_odd(half %x, i32 %y.arg) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_or_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffff8000, v0 ; GFX9-NEXT: s_movk_i32 s4, 0x7fff ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_f16_e64 v0, |v0|, v1 -; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2 +; GFX9-NEXT: v_mul_f16_e64 v1, |v0|, v1 +; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %y = or i32 %y.arg, 1 %fabs = call half @llvm.fabs.f16(half %x) @@ -37,10 +36,9 @@ define <2 x half> @test_pown_reduced_fast_v2f16_known_odd(<2 x half> %x, <2 x i3 ; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2 ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 0x80008000, v0 +; GFX9-NEXT: s_movk_i32 s4, 0x7fff ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff ; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 ; GFX9-NEXT: v_pk_mul_f16 v1, v3, v1 ; GFX9-NEXT: v_bfi_b32 v2, s4, v1, v0 @@ -67,10 +65,9 @@ define float @test_pown_reduced_fast_f32_known_odd(float %x, i32 %y.arg) #0 { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_or_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v0 ; GFX9-NEXT: s_brev_b32 s4, -2 -; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2 +; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, v1 +; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] %y = or i32 %y.arg, 1 %fabs = call float @llvm.fabs.f32(float %x) @@ -94,8 +91,6 @@ define <2 x float> @test_pown_reduced_fast_v2f32_known_odd(<2 x float> %x, <2 x ; GFX9-NEXT: s_brev_b32 s4, -2 ; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3 ; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, v2 -; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1 -; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v0 ; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0 ; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -118,8 +113,7 @@ define double @test_pown_reduced_fast_f64_known_odd(double %x, i32 %y.arg) #0 { ; GFX9-NEXT: v_cvt_f64_i32_e32 v[2:3], v2 ; GFX9-NEXT: s_brev_b32 s4, -2 ; GFX9-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3] -; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v1 -; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v0 +; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] %y = or i32 %y.arg, 1 @@ -144,10 +138,8 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2 ; GFX9-NEXT: s_brev_b32 s4, -2 ; GFX9-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[4:5] ; GFX9-NEXT: v_mul_f64 v[6:7], |v[2:3]|, v[6:7] -; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v3 -; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1 ; GFX9-NEXT: v_bfi_b32 v1, s4, v5, v1 -; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v0 +; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v3 ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index aedbb3f6a985a..ebec8c1c4d654 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -152,41 +152,36 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind { ; PPC32-LABEL: test_copysign: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: mflr 0 -; PPC32-NEXT: stwu 1, -96(1) -; PPC32-NEXT: stw 0, 100(1) -; PPC32-NEXT: stfd 1, 40(1) -; PPC32-NEXT: lwz 3, 44(1) -; PPC32-NEXT: stfd 2, 32(1) -; PPC32-NEXT: stw 3, 60(1) -; PPC32-NEXT: lwz 3, 40(1) -; PPC32-NEXT: stfd 3, 72(1) -; PPC32-NEXT: stw 3, 56(1) +; PPC32-NEXT: stwu 1, -80(1) +; PPC32-NEXT: stw 0, 84(1) +; PPC32-NEXT: stfd 1, 32(1) ; PPC32-NEXT: lwz 3, 36(1) -; PPC32-NEXT: stfd 4, 64(1) +; PPC32-NEXT: stfd 2, 24(1) ; PPC32-NEXT: stw 3, 52(1) ; PPC32-NEXT: lwz 3, 32(1) -; PPC32-NEXT: lfd 1, 56(1) +; PPC32-NEXT: stfd 3, 56(1) ; PPC32-NEXT: stw 3, 48(1) -; PPC32-NEXT: lwz 3, 76(1) -; PPC32-NEXT: lfd 2, 48(1) -; PPC32-NEXT: stw 3, 92(1) -; PPC32-NEXT: lwz 3, 72(1) -; PPC32-NEXT: stw 3, 88(1) -; PPC32-NEXT: lwz 3, 68(1) -; PPC32-NEXT: lfd 3, 88(1) -; PPC32-NEXT: stw 3, 84(1) -; PPC32-NEXT: lwz 3, 64(1) -; PPC32-NEXT: stw 3, 80(1) -; PPC32-NEXT: lfd 4, 80(1) +; PPC32-NEXT: lwz 3, 28(1) +; PPC32-NEXT: lfd 4, 64(1) +; PPC32-NEXT: stw 3, 44(1) +; PPC32-NEXT: lwz 3, 24(1) +; PPC32-NEXT: lfd 1, 48(1) +; PPC32-NEXT: stw 3, 40(1) +; PPC32-NEXT: lwz 3, 60(1) +; PPC32-NEXT: lfd 2, 40(1) +; PPC32-NEXT: stw 3, 76(1) +; PPC32-NEXT: lwz 3, 56(1) +; PPC32-NEXT: stw 3, 72(1) +; PPC32-NEXT: lfd 3, 72(1) ; PPC32-NEXT: bl copysignl -; PPC32-NEXT: stfd 1, 16(1) -; PPC32-NEXT: stfd 2, 24(1) -; PPC32-NEXT: lwz 3, 16(1) -; PPC32-NEXT: lwz 4, 20(1) -; PPC32-NEXT: lwz 5, 24(1) -; PPC32-NEXT: lwz 6, 28(1) -; PPC32-NEXT: lwz 0, 100(1) -; PPC32-NEXT: addi 1, 1, 96 +; PPC32-NEXT: stfd 1, 8(1) +; PPC32-NEXT: stfd 2, 16(1) +; PPC32-NEXT: lwz 3, 8(1) +; PPC32-NEXT: lwz 4, 12(1) +; PPC32-NEXT: lwz 5, 16(1) +; PPC32-NEXT: lwz 6, 20(1) +; PPC32-NEXT: lwz 0, 84(1) +; PPC32-NEXT: addi 1, 1, 80 ; PPC32-NEXT: mtlr 0 ; PPC32-NEXT: blr entry: diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll index a2093f5b5e43a..ced6ff66ef678 100644 --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -320,9 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind { ; ; RV64IZFINXZDINX-LABEL: fsgnjn_d: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: li a2, -1 -; RV64IZFINXZDINX-NEXT: slli a2, a2, 63 -; RV64IZFINXZDINX-NEXT: xor a1, a1, a2 +; RV64IZFINXZDINX-NEXT: not a1, a1 ; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1 ; RV64IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll index 99835ff59493f..70d3291d3a840 100644 --- a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll @@ -156,20 +156,16 @@ define double @fcopysign_fneg(double %a, double %b) nounwind { ; ; RV64IFD-LABEL: fcopysign_fneg: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: li a2, -1 -; RV64IFD-NEXT: slli a2, a2, 63 -; RV64IFD-NEXT: xor a1, a1, a2 -; RV64IFD-NEXT: fmv.d.x fa5, a1 +; RV64IFD-NEXT: fmv.d.x fa5, a0 +; RV64IFD-NEXT: not a0, a1 ; RV64IFD-NEXT: fmv.d.x fa4, a0 -; RV64IFD-NEXT: fsgnj.d fa5, fa4, fa5 +; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa4 ; RV64IFD-NEXT: fmv.x.d a0, fa5 ; RV64IFD-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcopysign_fneg: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: li a2, -1 -; RV64IZFINXZDINX-NEXT: slli a2, a2, 63 -; RV64IZFINXZDINX-NEXT: xor a1, a1, a2 +; RV64IZFINXZDINX-NEXT: not a1, a1 ; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1 ; RV64IZFINXZDINX-NEXT: ret %1 = fneg double %b diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll index 2495b1ad300c6..53df588bf1c37 100644 --- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll @@ -119,18 +119,16 @@ define float @fcopysign_fneg(float %a, float %b) nounwind { ; ; RV32IF-LABEL: fcopysign_fneg: ; RV32IF: # %bb.0: -; RV32IF-NEXT: lui a2, 524288 -; RV32IF-NEXT: xor a1, a1, a2 -; RV32IF-NEXT: fmv.w.x fa5, a1 +; RV32IF-NEXT: fmv.w.x fa5, a0 +; RV32IF-NEXT: not a0, a1 ; RV32IF-NEXT: fmv.w.x fa4, a0 -; RV32IF-NEXT: fsgnj.s fa5, fa4, fa5 +; RV32IF-NEXT: fsgnj.s fa5, fa5, fa4 ; RV32IF-NEXT: fmv.x.w a0, fa5 ; RV32IF-NEXT: ret ; ; RV32IZFINX-LABEL: fcopysign_fneg: ; RV32IZFINX: # %bb.0: -; RV32IZFINX-NEXT: lui a2, 524288 -; RV32IZFINX-NEXT: xor a1, a1, a2 +; RV32IZFINX-NEXT: not a1, a1 ; RV32IZFINX-NEXT: fsgnj.s a0, a0, a1 ; RV32IZFINX-NEXT: ret ; `````````` </details> https://github.com/llvm/llvm-project/pull/97151 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits