The patch adds integer nabs "(NEG (ABS (...)))" instructions, adds STV conversion and adjusts STV cost calculations accordingly. When CMOV instruction is used to implement abs, the sign is determined from the preceding operand negation, and CMOVS is used to select between negated and non-negated value.
To implement nabs, just reverse the condition and emit CMOVNS instead. The STV costs are adjusted for inherent NOT of nabs insn. V2DI NOT is somehow costly operation, since it is implemented as a load of zero, followed by a SUB insn. OTOH, integer nabs with inherent NOT is relatively cheap, so some STV chains became less profitable for conversion. The patch rewrites operand scanner in compute_convert_gain to a switch and reorders case instances in general_scalar_to_vector_candidate_p to benefit from fallthroughs, and to remove special processing of andnot in the later case. gcc/ 2021-07-01 Uroš Bizjak <ubiz...@gmail.com> PR target/101044 * config/i386/i386.md (*nabs<dwi>2_doubleword): New insn_and_split pattern. (*nabs<dwi>2_1): Ditto. * config/i386/i386-features.c (general_scalar_chain::compute_convert_gain): Handle (NEG (ABS (...))) RTX. Rewrite src code scanner as switch statement. (general_scalar_chain::convert_insn): Handle (NEG (ABS (...))) RTX. (general_scalar_to_vector_candidate_p): Detect (NEG (ABS (...))) RTX. Reorder case statements for (AND (NOT (...) ...)) fallthrough. gcc/testsuite/ 2021-07-01 Uroš Bizjak <ubiz...@gmail.com> PR target/101044 * gcc.target/i386/pr101044.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to master. Uros.
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index a25769ae478..cbd430a2ecf 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -544,71 +544,83 @@ general_scalar_chain::compute_convert_gain () += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; else if (MEM_P (src) && REG_P (dst)) igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; - else if (GET_CODE (src) == ASHIFT - || GET_CODE (src) == ASHIFTRT - || GET_CODE (src) == LSHIFTRT) - { - if (m == 2) - { - if (INTVAL (XEXP (src, 1)) >= 32) - igain += ix86_cost->add; - else - igain += ix86_cost->shift_const; - } + else + switch (GET_CODE (src)) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (m == 2) + { + if (INTVAL (XEXP (src, 1)) >= 32) + igain += ix86_cost->add; + else + igain += ix86_cost->shift_const; + } - igain += ix86_cost->shift_const - ix86_cost->sse_op; + igain += ix86_cost->shift_const - ix86_cost->sse_op; - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - } - else if (GET_CODE (src) == PLUS - || GET_CODE (src) == MINUS - || GET_CODE (src) == IOR - || GET_CODE (src) == XOR - || GET_CODE (src) == AND) - { - igain += m * ix86_cost->add - ix86_cost->sse_op; - /* Additional gain for andnot for targets without BMI. */ - if (GET_CODE (XEXP (src, 0)) == NOT - && !TARGET_BMI) - igain += m * ix86_cost->add; - - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - if (CONST_INT_P (XEXP (src, 1))) - igain -= vector_const_cost (XEXP (src, 1)); - } - else if (GET_CODE (src) == NEG - || GET_CODE (src) == NOT) - igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1); - else if (GET_CODE (src) == ABS - || GET_CODE (src) == SMAX - || GET_CODE (src) == SMIN - || GET_CODE (src) == UMAX - || GET_CODE (src) == UMIN) - { - /* We do not have any conditional move cost, estimate it as a - reg-reg move. Comparisons are costed as adds. */ - igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); - /* Integer SSE ops are all costed the same. */ - igain -= ix86_cost->sse_op; - } - else if (GET_CODE (src) == COMPARE) - { - /* Assume comparison cost is the same. */ - } - else if (CONST_INT_P (src)) - { - if (REG_P (dst)) - /* DImode can be immediate for TARGET_64BIT and SImode always. */ - igain += m * COSTS_N_INSNS (1); - else if (MEM_P (dst)) - igain += (m * ix86_cost->int_store[2] - - ix86_cost->sse_store[sse_cost_idx]); - igain -= vector_const_cost (src); - } - else - gcc_unreachable (); + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + break; + + case AND: + case IOR: + case XOR: + case PLUS: + case MINUS: + igain += m * ix86_cost->add - ix86_cost->sse_op; + /* Additional gain for andnot for targets without BMI. */ + if (GET_CODE (XEXP (src, 0)) == NOT + && !TARGET_BMI) + igain += m * ix86_cost->add; + + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + if (CONST_INT_P (XEXP (src, 1))) + igain -= vector_const_cost (XEXP (src, 1)); + break; + + case NEG: + case NOT: + igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); + + if (GET_CODE (XEXP (src, 0)) != ABS) + { + igain += m * ix86_cost->add; + break; + } + /* FALLTHRU */ + + case ABS: + case SMAX: + case SMIN: + case UMAX: + case UMIN: + /* We do not have any conditional move cost, estimate it as a + reg-reg move. Comparisons are costed as adds. */ + igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); + /* Integer SSE ops are all costed the same. */ + igain -= ix86_cost->sse_op; + break; + + case COMPARE: + /* Assume comparison cost is the same. */ + break; + + case CONST_INT: + if (REG_P (dst)) + /* DImode can be immediate for TARGET_64BIT and SImode always. */ + igain += m * COSTS_N_INSNS (1); + else if (MEM_P (dst)) + igain += (m * ix86_cost->int_store[2] + - ix86_cost->sse_store[sse_cost_idx]); + igain -= vector_const_cost (src); + break; + + default: + gcc_unreachable (); + } if (igain != 0 && dump_file) { @@ -1009,7 +1021,19 @@ general_scalar_chain::convert_insn (rtx_insn *insn) case NEG: src = XEXP (src, 0); - convert_op (&src, insn); + + if (GET_CODE (src) == ABS) + { + src = XEXP (src, 0); + convert_op (&src, insn); + subreg = gen_reg_rtx (vmode); + emit_insn_before (gen_rtx_SET (subreg, + gen_rtx_ABS (vmode, src)), insn); + src = subreg; + } + else + convert_op (&src, insn); + subreg = gen_reg_rtx (vmode); emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); src = gen_rtx_MINUS (vmode, subreg, src); @@ -1042,9 +1066,10 @@ general_scalar_chain::convert_insn (rtx_insn *insn) gcc_assert (REG_P (src) && GET_MODE (src) == DImode); subreg = gen_rtx_SUBREG (V2DImode, src, 0); - emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg)), + emit_insn_before (gen_vec_interleave_lowv2di + (copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg)), insn); dst = gen_rtx_REG (CCmode, FLAGS_REG); src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), @@ -1400,11 +1425,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) return false; /* Fallthru. */ - case PLUS: - case MINUS: + case AND: case IOR: case XOR: - case AND: + case PLUS: + case MINUS: if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)) && !CONST_INT_P (XEXP (src, 1))) @@ -1413,18 +1438,32 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) if (GET_MODE (XEXP (src, 1)) != mode && !CONST_INT_P (XEXP (src, 1))) return false; + + /* Check for andnot case. */ + if (GET_CODE (src) != AND + || GET_CODE (XEXP (src, 0)) != NOT) + break; + + src = XEXP (src, 0); + /* FALLTHRU */ + + case NOT: break; + case NEG: + /* Check for nabs case. */ + if (GET_CODE (XEXP (src, 0)) != ABS) + break; + + src = XEXP (src, 0); + /* FALLTHRU */ + case ABS: if ((mode == DImode && !TARGET_AVX512VL) || (mode == SImode && !TARGET_SSSE3)) return false; break; - case NEG: - case NOT: - break; - case REG: return true; @@ -1438,12 +1477,8 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) - && !CONST_INT_P (XEXP (src, 0)) - /* Check for andnot case. */ - && (GET_CODE (src) != AND - || GET_CODE (XEXP (src, 0)) != NOT - || !REG_P (XEXP (XEXP (src, 0), 0)))) - return false; + && !CONST_INT_P (XEXP (src, 0))) + return false; if (GET_MODE (XEXP (src, 0)) != mode && !CONST_INT_P (XEXP (src, 0))) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9b619e2f78f..156c6a94989 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10305,6 +10305,50 @@ (define_insn_and_split "*abs<dwi>2_doubleword" split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); }) +(define_insn_and_split "*nabs<dwi>2_doubleword" + [(set (match_operand:<DWI> 0 "register_operand") + (neg:<DWI> + (abs:<DWI> + (match_operand:<DWI> 1 "general_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CCC FLAGS_REG) + (ne:CCC (match_dup 1) (const_int 0))) + (set (match_dup 2) (neg:DWIH (match_dup 1)))]) + (parallel + [(set (match_dup 5) + (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CCGOC FLAGS_REG) + (compare:CCGOC + (neg:DWIH (match_dup 5)) + (const_int 0))) + (set (match_dup 5) + (neg:DWIH (match_dup 5)))]) + (set (match_dup 0) + (if_then_else:DWIH + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1))) + (set (match_dup 3) + (if_then_else:DWIH + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 5) + (match_dup 4)))] +{ + operands[1] = force_reg (<DWI>mode, operands[1]); + operands[2] = gen_reg_rtx (<DWI>mode); + + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); +}) + (define_insn_and_split "*abs<mode>2_1" [(set (match_operand:SWI 0 "register_operand") (abs:SWI @@ -10332,6 +10376,34 @@ (define_insn_and_split "*abs<mode>2_1" operands[2] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "*nabs<mode>2_1" + [(set (match_operand:SWI 0 "register_operand") + (neg:SWI + (abs:SWI + (match_operand:SWI 1 "general_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CCGOC FLAGS_REG) + (compare:CCGOC + (neg:SWI (match_dup 1)) + (const_int 0))) + (set (match_dup 2) + (neg:SWI (match_dup 1)))]) + (set (match_dup 0) + (if_then_else:SWI + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1)))] +{ + operands[1] = force_reg (<MODE>mode, operands[1]); + operands[2] = gen_reg_rtx (<MODE>mode); +}) + (define_expand "<code>tf2" [(set (match_operand:TF 0 "register_operand") (absneg:TF (match_operand:TF 1 "register_operand")))] diff --git a/gcc/testsuite/gcc.target/i386/pr101044.c b/gcc/testsuite/gcc.target/i386/pr101044.c new file mode 100644 index 00000000000..03df86debb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101044.c @@ -0,0 +1,9 @@ +/* PR target/101044 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "neg" 1 } } */ + +int foo (int x) +{ + return (x < 0) ? x : -x; +}