On Fri, Oct 11, 2024 at 9:16 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The adjusted and new spaceship expanders ICE with -mtune=i486 or > -mtune=i586. > The problem is that in that case TARGET_ZERO_EXTEND_WITH_AND is true > and zero_extendqisi2 isn't allowed in that case, and we can't use > the replacement AND, because that clobbers flags and we want to use them > again. > > The following patch fixes that by using in those cases roughly what > we want to expand it to after peephole2 optimizations, i.e. xor > before the comparison, *setcc_qi_slp and sbbl $0 (or for signed > int case xoring of 2 regs, two *setcc_qi_slp, subl). > For *setcc_qi_slp, it uses the setcc_si_slp hacks with UNSPEC that > were in use for the floating point jp case (so such code is IMHO > undesirable for the !TARGET_ZERO_EXTEND_WITH_AND case as we want to > give combiner more liberty in that case). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2024-10-11 Jakub Jelinek <ja...@redhat.com> > > PR target/117053 > * config/i386/i386-expand.cc (ix86_expand_fp_spaceship): Handle > TARGET_ZERO_EXTEND_WITH_AND differently. > (ix86_expand_int_spaceship): Likewise. > > * g++.target/i386/pr116896-3.C: New test.
OK. Thanks, Uros. > > --- gcc/config/i386/i386-expand.cc.jj 2024-10-08 10:44:30.144935903 +0200 > +++ gcc/config/i386/i386-expand.cc 2024-10-10 20:16:05.192669243 +0200 > @@ -3150,7 +3150,9 @@ ix86_expand_fp_spaceship (rtx dest, rtx > { > gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH); > rtx zero = NULL_RTX; > - if (op2 != const0_rtx && TARGET_IEEE_FP && GET_MODE (dest) == SImode) > + if (op2 != const0_rtx > + && (TARGET_IEEE_FP || TARGET_ZERO_EXTEND_WITH_AND) > + && GET_MODE (dest) == SImode) > zero = force_reg (SImode, const0_rtx); > rtx gt = ix86_expand_fp_compare (GT, op0, op1); > rtx l0 = op2 == const0_rtx ? gen_label_rtx () : NULL_RTX; > @@ -3190,15 +3192,20 @@ ix86_expand_fp_spaceship (rtx dest, rtx > } > else > { > - rtx lt_tmp = gen_reg_rtx (QImode); > - ix86_expand_setcc (lt_tmp, UNLT, gen_rtx_REG (CCFPmode, FLAGS_REG), > - const0_rtx); > - if (GET_MODE (dest) != QImode) > + rtx lt_tmp = NULL_RTX; > + if (GET_MODE (dest) != SImode || !TARGET_ZERO_EXTEND_WITH_AND) > { > - tmp = gen_reg_rtx (GET_MODE (dest)); > - emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest), > - lt_tmp))); > - lt_tmp = tmp; > + lt_tmp = gen_reg_rtx (QImode); > + ix86_expand_setcc (lt_tmp, UNLT, gen_rtx_REG (CCFPmode, FLAGS_REG), > + const0_rtx); > + if (GET_MODE (dest) != QImode) > + { > + tmp = gen_reg_rtx (GET_MODE (dest)); > + emit_insn (gen_rtx_SET (tmp, > + gen_rtx_ZERO_EXTEND (GET_MODE (dest), > + lt_tmp))); > + lt_tmp = tmp; > + } > } > rtx gt_tmp; > if (zero) > @@ -3206,7 +3213,9 @@ ix86_expand_fp_spaceship (rtx dest, rtx > /* If TARGET_IEEE_FP and dest has SImode, emit SImode clear > before the floating point comparison and use setcc_si_slp > pattern to hide it from the combiner, so that it doesn't > - undo it. */ > + undo it. Similarly for TARGET_ZERO_EXTEND_WITH_AND, where > + the ZERO_EXTEND normally emitted would need to be AND > + with flags clobber. */ > tmp = ix86_expand_compare (GT, XEXP (gt, 0), const0_rtx); > PUT_MODE (tmp, QImode); > emit_insn (gen_setcc_si_slp (zero, tmp, zero)); > @@ -3225,10 +3234,23 @@ ix86_expand_fp_spaceship (rtx dest, rtx > gt_tmp = tmp; > } > } > - tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp, > dest, > - 0, OPTAB_DIRECT); > - if (!rtx_equal_p (tmp, dest)) > - emit_move_insn (dest, tmp); > + if (lt_tmp) > + { > + tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp, > + dest, 0, OPTAB_DIRECT); > + if (!rtx_equal_p (tmp, dest)) > + emit_move_insn (dest, tmp); > + } > + else > + { > + /* For TARGET_ZERO_EXTEND_WITH_AND emit sbb directly, as we can't > + do ZERO_EXTEND without clobbering flags. */ > + tmp = ix86_expand_compare (UNLT, XEXP (gt, 0), const0_rtx); > + PUT_MODE (tmp, SImode); > + emit_insn (gen_subsi3_carry (dest, gt_tmp, > + force_reg (GET_MODE (dest), > const0_rtx), > + XEXP (gt, 0), tmp)); > + } > } > emit_jump (lend); > if (l2) > @@ -3246,6 +3268,14 @@ void > ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2) > { > gcc_assert (INTVAL (op2)); > + rtx zero1 = NULL_RTX, zero2 = NULL_RTX; > + if (TARGET_ZERO_EXTEND_WITH_AND && GET_MODE (dest) == SImode) > + { > + zero1 = force_reg (SImode, const0_rtx); > + if (INTVAL (op2) != 1) > + zero2 = force_reg (SImode, const0_rtx); > + } > + > /* Not using ix86_expand_int_compare here, so that it doesn't swap > operands nor optimize CC mode - we need a mode usable for both > LT and GT resp. LTU and GTU comparisons with the same unswapped > @@ -3253,30 +3283,70 @@ ix86_expand_int_spaceship (rtx dest, rtx > rtx flags = gen_rtx_REG (INTVAL (op2) != 1 ? CCGCmode : CCmode, FLAGS_REG); > rtx tmp = gen_rtx_COMPARE (GET_MODE (flags), op0, op1); > emit_insn (gen_rtx_SET (flags, tmp)); > - rtx lt_tmp = gen_reg_rtx (QImode); > - ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags, > - const0_rtx); > - if (GET_MODE (dest) != QImode) > - { > - tmp = gen_reg_rtx (GET_MODE (dest)); > - emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest), > - lt_tmp))); > - lt_tmp = tmp; > - } > - rtx gt_tmp = gen_reg_rtx (QImode); > - ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags, > - const0_rtx); > - if (GET_MODE (dest) != QImode) > - { > - tmp = gen_reg_rtx (GET_MODE (dest)); > - emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest), > - gt_tmp))); > - gt_tmp = tmp; > - } > - tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp, dest, > - 0, OPTAB_DIRECT); > - if (!rtx_equal_p (tmp, dest)) > - emit_move_insn (dest, tmp); > + rtx lt_tmp = NULL_RTX; > + if (zero2) > + { > + /* For TARGET_ZERO_EXTEND_WITH_AND, emit setcc_si_slp to avoid > + ZERO_EXTEND. */ > + tmp = ix86_expand_compare (LT, flags, const0_rtx); > + PUT_MODE (tmp, QImode); > + emit_insn (gen_setcc_si_slp (zero2, tmp, zero2)); > + lt_tmp = zero2; > + } > + else if (!zero1) > + { > + lt_tmp = gen_reg_rtx (QImode); > + ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags, > + const0_rtx); > + if (GET_MODE (dest) != QImode) > + { > + tmp = gen_reg_rtx (GET_MODE (dest)); > + emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest), > + lt_tmp))); > + lt_tmp = tmp; > + } > + } > + rtx gt_tmp; > + if (zero1) > + { > + /* For TARGET_ZERO_EXTEND_WITH_AND, emit setcc_si_slp to avoid > + ZERO_EXTEND. */ > + tmp = ix86_expand_compare (INTVAL (op2) != 1 ? GT : GTU, flags, > + const0_rtx); > + PUT_MODE (tmp, QImode); > + emit_insn (gen_setcc_si_slp (zero1, tmp, zero1)); > + gt_tmp = zero1; > + } > + else > + { > + gt_tmp = gen_reg_rtx (QImode); > + ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags, > + const0_rtx); > + if (GET_MODE (dest) != QImode) > + { > + tmp = gen_reg_rtx (GET_MODE (dest)); > + emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest), > + gt_tmp))); > + gt_tmp = tmp; > + } > + } > + if (lt_tmp) > + { > + tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp, > dest, > + 0, OPTAB_DIRECT); > + if (!rtx_equal_p (tmp, dest)) > + emit_move_insn (dest, tmp); > + } > + else > + { > + /* For TARGET_ZERO_EXTEND_WITH_AND emit sbb directly, as we can't > + do ZERO_EXTEND without clobbering flags. */ > + tmp = ix86_expand_compare (LTU, flags, const0_rtx); > + PUT_MODE (tmp, SImode); > + emit_insn (gen_subsi3_carry (dest, gt_tmp, > + force_reg (GET_MODE (dest), const0_rtx), > + flags, tmp)); > + } > } > > /* Expand comparison setting or clearing carry flag. Return true when > --- gcc/testsuite/g++.target/i386/pr116896-3.C.jj 2024-10-10 > 20:10:50.769968080 +0200 > +++ gcc/testsuite/g++.target/i386/pr116896-3.C 2024-10-10 20:12:46.941380162 > +0200 > @@ -0,0 +1,5 @@ > +// PR middle-end/116896 > +// { dg-do run { target { c++20 && ia32 } } } > +// { dg-options "-O2 -march=i686 -mtune=i486" } > + > +#include "pr116896-2.C" > > Jakub >