On Tue, 8 Oct 2024, Jakub Jelinek wrote: > On Mon, Oct 07, 2024 at 10:32:57AM +0200, Richard Biener wrote: > > > They are implementation defined, -1, 0, 1, 2 is defined by libstdc++: > > > using type = signed char; > > > enum class _Ord : type { equivalent = 0, less = -1, greater = 1 }; > > > enum class _Ncmp : type { _Unordered = 2 }; > > > https://eel.is/c++draft/cmp#categories.pre-1 documents them as > > > enum class ord { equal = 0, equivalent = equal, less = -1, greater = 1 }; > > > // exposition only > > > enum class ncmp { unordered = -127 }; > > > // exposition only > > > and now looking at it, LLVM's libc++ takes that literally and uses > > > -1, 0, 1, -127. One can't use <=> operator without including <compare> > > > which provides the enums, so I think if all we care about is libstdc++, > > > then just hardcoding -1, 0, 1, 2 is fine, if we want to also optimize > > > libc++ when used with gcc, we could support -1, 0, 1, -127 as another > > > option. > > > Supporting arbitrary 4 values doesn't make sense, at least on x86 the > > > only reason to do the conversion to int in an optab is a good sequence > > > to turn the flag comparisons to -1, 0, 1. So, either we do nothing > > > more than the patch, or add handle both 2 and -127 for unordered, > > > or add support for arbitrary value for the unordered case except > > > -1, 0, 1 (then -1 could mean signed int, 1 unsigned int, 0 do the jumps > > > and any other value what should be returned for unordered. > > Here is an incremental patch which adds support for (almost) arbitrary > unordered constant value. It changes the .SPACESHIP and spaceship<mode>4 > optab conventions, so 0 means use branches, floating point, -1, 0, 1, 2 > results consumed by tree-ssa-math-opts.cc emitted comparisons, -1 > means signed int comparisons, -1, 0, 1 results, 1 means unsigned int > comparisons, -1, 0, 1 results, and for constant other than -1, 0, 1 > which fit into [-128, 127] converted to the PHI type are otherwise > specified as the last argument (then it is -1, 0, 1, C results). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
LGTM, please give others some time to comment. Thanks, Richard. > 2024-10-08 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/116896 > * tree-ssa-math-opts.cc (optimize_spaceship): Handle unordered values > other than 2, but they still need to be signed char range possibly > converted to the PHI result and can't be in [-1, 1] range. Use > last .SPACESHIP argument of 1 for unsigned int comparisons, -1 for > signed int, 0 for floating point branches and any other for floating > point with that value as unordered. > * config/i386/i386-expand.cc (ix86_expand_fp_spaceship): Use op2 rather > const2_rtx if op2 is not const0_rtx for unordered result. > (ix86_expand_int_spaceship): Change INTVAL (op2) == 1 tests to > INTVAL (op2) != -1. > * doc/md.texi (spaceship@var{m}4): Document the above changes. > > * gcc.target/i386/pr116896.c: New test. > > --- gcc/tree-ssa-math-opts.cc.jj 2024-10-07 11:37:46.576977487 +0200 > +++ gcc/tree-ssa-math-opts.cc 2024-10-07 14:23:12.918608708 +0200 > @@ -6023,11 +6023,12 @@ optimize_spaceship (gcond *stmt) > > /* Check if there is a single bb into which all failed conditions > jump to (perhaps through an empty block) and if it results in > - a single integral PHI which just sets it to -1, 0, 1, 2 > + a single integral PHI which just sets it to -1, 0, 1, X > (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather > than 0 as last .SPACESHIP argument to tell backends it might > consider different code generation and just cast the result > - of .SPACESHIP to the PHI result. */ > + of .SPACESHIP to the PHI result. X above is some value > + other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */ > tree arg3 = integer_zero_node; > edge e = EDGE_SUCC (bb0, 0); > if (e->dest == bb1) > @@ -6054,6 +6055,7 @@ optimize_spaceship (gcond *stmt) > && integer_zerop (gimple_phi_arg_def_from_edge (phi, e)) > && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3)) > { > + HOST_WIDE_INT argval = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? 2 : -1; > for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i) > { > edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2; > @@ -6071,16 +6073,29 @@ optimize_spaceship (gcond *stmt) > tree a = gimple_phi_arg_def_from_edge (phi, e3); > if (TREE_CODE (a) != INTEGER_CST > || (i == 0 && !integer_onep (a)) > - || (i == 1 && !integer_all_onesp (a)) > - || (i == 2 && wi::to_widest (a) != 2)) > + || (i == 1 && !integer_all_onesp (a))) > { > phi = NULL; > break; > } > + if (i == 2) > + { > + tree minv = TYPE_MIN_VALUE (signed_char_type_node); > + tree maxv = TYPE_MAX_VALUE (signed_char_type_node); > + widest_int w = widest_int::from (wi::to_wide (a), SIGNED); > + if ((w >= -1 && w <= 1) > + || w < wi::to_widest (minv) > + || w > wi::to_widest (maxv)) > + { > + phi = NULL; > + break; > + } > + argval = w.to_shwi (); > + } > } > if (phi) > arg3 = build_int_cst (integer_type_node, > - TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 2 : 1); > + TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval); > } > > /* For integral <=> comparisons only use .SPACESHIP if it is turned > @@ -6094,11 +6109,18 @@ optimize_spaceship (gcond *stmt) > gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > gsi_insert_before (&gsi, gc, GSI_SAME_STMT); > > - wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node)); > - wide_int w2 = (HONOR_NANS (TREE_TYPE (arg1)) > - ? wi::two (TYPE_PRECISION (integer_type_node)) > - : wi::one (TYPE_PRECISION (integer_type_node))); > - int_range<1> vr (TREE_TYPE (lhs), wm1, w2); > + wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node)); > + wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node)); > + if (HONOR_NANS (TREE_TYPE (arg1))) > + { > + if (arg3 == integer_zero_node) > + wmax = wi::two (TYPE_PRECISION (integer_type_node)); > + else if (tree_int_cst_sgn (arg3) < 0) > + wmin = wi::to_wide (arg3); > + else > + wmax = wi::to_wide (arg3); > + } > + int_range<1> vr (TREE_TYPE (lhs), wmin, wmax); > set_range_info (lhs, vr); > > if (arg3 != integer_zero_node) > --- gcc/config/i386/i386-expand.cc.jj 2024-10-07 11:37:46.286981563 +0200 > +++ gcc/config/i386/i386-expand.cc 2024-10-07 14:02:05.798392986 +0200 > @@ -3234,7 +3234,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx > if (l2) > { > emit_label (l2); > - emit_move_insn (dest, const2_rtx); > + emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2); > } > emit_label (lend); > } > @@ -3250,11 +3250,11 @@ ix86_expand_int_spaceship (rtx dest, rtx > operands nor optimize CC mode - we need a mode usable for both > LT and GT resp. LTU and GTU comparisons with the same unswapped > operands. */ > - rtx flags = gen_rtx_REG (INTVAL (op2) == 1 ? CCGCmode : CCmode, FLAGS_REG); > + rtx flags = gen_rtx_REG (INTVAL (op2) != 1 ? CCGCmode : CCmode, FLAGS_REG); > rtx tmp = gen_rtx_COMPARE (GET_MODE (flags), op0, op1); > emit_insn (gen_rtx_SET (flags, tmp)); > rtx lt_tmp = gen_reg_rtx (QImode); > - ix86_expand_setcc (lt_tmp, INTVAL (op2) == 1 ? LT : LTU, flags, > + ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags, > const0_rtx); > if (GET_MODE (dest) != QImode) > { > @@ -3264,7 +3264,7 @@ ix86_expand_int_spaceship (rtx dest, rtx > lt_tmp = tmp; > } > rtx gt_tmp = gen_reg_rtx (QImode); > - ix86_expand_setcc (gt_tmp, INTVAL (op2) == 1 ? GT : GTU, flags, > + ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags, > const0_rtx); > if (GET_MODE (dest) != QImode) > { > --- gcc/doc/md.texi.jj 2024-10-07 11:37:46.380980242 +0200 > +++ gcc/doc/md.texi 2024-10-07 14:10:39.037195535 +0200 > @@ -8575,8 +8575,11 @@ if operand 1 with mode @var{m} compares > operand 2, greater than operand 2 or is unordered with operand 2. > Operand 3 should be @code{const0_rtx} if the result is used in comparisons, > @code{const1_rtx} if the result is used as integer value and the comparison > -is signed, @code{const2_rtx} if the result is used as integer value and > -the comparison is unsigned. > +is integral unsigned, @code{constm1_rtx} if the result is used as integer > +value and the comparison is integral signed and some other @code{CONST_INT} > +if the result is used as integer value and the comparison is floating point. > +In the last case, instead of setting output operand 0 to 2 for unordered, > +set it to operand 3. > @var{m} should be a scalar floating point mode. > > This pattern is not allowed to @code{FAIL}. > --- gcc/testsuite/gcc.target/i386/pr116896.c.jj 2024-10-07 > 14:17:38.024311384 +0200 > +++ gcc/testsuite/gcc.target/i386/pr116896.c 2024-10-07 14:17:33.409376197 > +0200 > @@ -0,0 +1,59 @@ > +/* PR middle-end/116896 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att -fno-stack-protector" } */ > +/* { dg-final { scan-assembler-times "\tjp\t" 2 } } */ > +/* { dg-final { scan-assembler-not "\tj\[^mp\]\[a-z\]*\t" } } */ > +/* { dg-final { scan-assembler-times "\tsbb\[bl\]\t\\\$0, " 4 } } */ > +/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */ > + > +signed char > +foo (float x, float y) > +{ > + if (x == y) > + return 0; > + else if (x < y) > + return -1; > + else if (x > y) > + return 1; > + else > + return 2; > +} > + > +__attribute__((optimize ("fast-math"))) signed char > +bar (float x, float y) > +{ > + if (x == y) > + return 0; > + else if (x < y) > + return -1; > + else if (x > y) > + return 1; > + else > + return 2; > +} > + > +signed char > +baz (float x, float y) > +{ > + if (x == y) > + return 0; > + else if (x < y) > + return -1; > + else if (x > y) > + return 1; > + else > + return -127; > +} > + > +__attribute__((optimize ("fast-math"))) signed char > +qux (float x, float y) > +{ > + if (x == y) > + return 0; > + else if (x < y) > + return -1; > + else if (x > y) > + return 1; > + else > + return -127; > +} > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)