1] AArch64: Define the spaceship optab [PR117013]

Richard Sandiford Fri, 24 Jan 2025 08:27:08 -0800

Spencer Abson <spencer.ab...@arm.com> writes:
> This expansion ensures that exactly one comparison is emitted for
> spacesip-like sequences on floating-point operands, including when
> the result of such sequences are compared against members of
> std::<some_ordering>::<some_value>.
>
> For both integer and floating-point types, we optimize for the case
> in which the result of a spaceship-like operation is written to a GPR.
> The PR highlights this issue for floating-point operands, but we also
> make an improvement for integers, preferring:
>
> cmp     w0, w1
> cset  w1, gt
> csinv w0, w1, wzr, ge
>
> over:
>
> cmp   w0, w1
> mov     w0, 1
> csinv   w0, w0, wzr, ge
> csel    w0, w0, wzr, ne
>
> to compute:
>
> auto test(int a, int b) { return a <=> b;}
>
> gcc/ChangeLog:
>       PR target/117013
>       * config/aarch64/aarch64-protos.h (aarch64_expand_fp_spaceship):
>       Declare optab expander function for floating-point types.
>       * config/aarch64/aarch64.cc (aarch64_expand_fp_spaceship):
>       Define optab expansion for floating-point types (new function).
>       * config/aarch64/aarch64.md (spaceship<mode>4):
>       Add define_expands for spaceship<mode>4 on integer and
>       floating-point types.
>
> gcc/testsuite/ChangeLog:
>       PR target/117013
>       * g++.target/aarch64/spaceship_1.C: New test.
>       * g++.target/aarch64/spaceship_2.C: New test.
>       * g++.target/aarch64/spaceship_3.C: New test.


Thanks for the patch!  This looks great to me.

OK for stage 1.  I'll push the patch when trunk is open for general commits,
if you don't already have write access by then.

Richard

> ---
>  gcc/config/aarch64/aarch64-protos.h           |   1 +
>  gcc/config/aarch64/aarch64.cc                 |  73 +++++++
>  gcc/config/aarch64/aarch64.md                 |  43 ++++
>  .../g++.target/aarch64/spaceship_1.C          | 192 ++++++++++++++++++
>  .../g++.target/aarch64/spaceship_2.C          |  72 +++++++
>  .../g++.target/aarch64/spaceship_3.C          |   9 +
>  6 files changed, 390 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/aarch64/spaceship_1.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/spaceship_2.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/spaceship_3.C
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index fa7bc8029be..39a1dae4e8b 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1240,6 +1240,7 @@ void aarch64_restore_za (rtx);
>  void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
>  void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx 
> *);
>  
> +void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
>  
>  extern bool aarch64_gcs_enabled ();
>  
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index dba779a8e51..ea5dd0d5047 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -31427,6 +31427,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode 
> crc_mode,
>      }
>  }
>  
> +/* Expand the spaceship optab for floating-point operands.
> +
> +   If the result is compared against (-1, 0, 1 , 2), expand into
> +   fcmpe + conditional branch insns.
> +
> +   Otherwise (the result is just stored as an integer), expand into
> +   fcmpe + a sequence of conditional select/increment/invert insns.  */
> +void
> +aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
> +{
> +  rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
> +  emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
> +
> +  rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
> +  rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
> +  rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
> +
> +  if (hint == const0_rtx)
> +    {
> +      rtx un_label = gen_label_rtx ();
> +      rtx lt_label = gen_label_rtx ();
> +      rtx gt_label = gen_label_rtx ();
> +      rtx end_label = gen_label_rtx ();
> +
> +      rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
> +                     gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
> +      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
> +
> +      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
> +                     gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
> +      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
> +
> +      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
> +                     gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
> +      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
> +
> +      /* Equality.  */
> +      emit_move_insn (dest, const0_rtx);
> +      emit_jump (end_label);
> +
> +      emit_label (un_label);
> +      emit_move_insn (dest, const2_rtx);
> +      emit_jump (end_label);
> +
> +      emit_label (gt_label);
> +      emit_move_insn (dest, const1_rtx);
> +      emit_jump (end_label);
> +
> +      emit_label (lt_label);
> +      emit_move_insn (dest, constm1_rtx);
> +
> +      emit_label (end_label);
> +    }
> +  else
> +    {
> +      rtx temp0 = gen_reg_rtx (SImode);
> +      rtx temp1 = gen_reg_rtx (SImode);
> +      rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
> +
> +      /* The value of hint is stored if the operands are unordered.  */
> +      rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
> +      if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
> +     temp_un = force_reg (SImode, temp_un);
> +
> +      emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
> +                     constm1_rtx, const0_rtx));
> +      emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
> +                     temp_un, const0_rtx));
> +      emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
> +                     gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
> +    }
> +}
> +
>  /* Target-specific selftests.  */
>  
>  #if CHECKING_P
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 39655ea5e39..b7dae8ad6e7 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4354,6 +4354,49 @@
>    [(set_attr "type" "alus_ext")]
>  )
>  
> +;; <=> operator pattern (integer)
> +;; (a == b) ? 0 : (a < b) ? -1 : 1.
> +(define_expand "spaceship<mode>4"
> +  [(match_operand:SI  0 "register_operand")
> +   (match_operand:GPI 1 "register_operand")
> +   (match_operand:GPI 2 "register_operand")
> +   (match_operand:SI  3 "const_int_operand")]
> +  ""
> +  {
> +    // 1 indicates unsigned comparison, -1 indicates signed.
> +    gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
> +
> +    rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
> +    RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
> +    RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
> +
> +    rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
> +    rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
> +
> +    rtx temp = gen_reg_rtx (SImode);
> +    emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
> +                                             const1_rtx, const0_rtx)));
> +    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
> +                                             constm1_rtx, temp)));
> +    DONE;
> +  }
> +)
> +
> +;; <=> operator pattern (floating-point)
> +;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
> +(define_expand "spaceship<mode>4"
> +  [(match_operand:SI  0 "register_operand")
> +   (match_operand:GPF 1 "register_operand")
> +   (match_operand:GPF 2 "register_operand")
> +   (match_operand:SI  3 "const_int_operand")]
> +  "TARGET_FLOAT"
> +  {
> +    aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
> +                             operands[3]);
> +    DONE;
> +  }
> +)
> +
>  ;; -------------------------------------------------------------------
>  ;; Store-flag and conditional select insns
>  ;; -------------------------------------------------------------------
> diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_1.C 
> b/gcc/testsuite/g++.target/aarch64/spaceship_1.C
> new file mode 100644
> index 00000000000..e6daf6218ca
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/spaceship_1.C
> @@ -0,0 +1,192 @@
> +// PR117013
> +/* { dg-do run } */
> +/* { dg-options "-O2 -std=c++20 -save-temps -fno-schedule-insns2" } */
> +/* { dg-final { check-function-bodies "**" "" ""} } */
> +
> +#include <compare>
> +#include <stdint.h>
> +
> +/*  Some implementation-defined value (other than 2) to represent
> +    partial_ordering::unordered (that for libc++ in this case). */
> +#define IMP_UN  -127
> +
> +#define SPACESHIP_FN(TYPE)              \
> +    [[gnu::noipa]]                      \
> +    auto ss_##TYPE (TYPE a, TYPE b)     \
> +    { return a <=> b; }                 \
> +
> +#define SPACESHIP_FN_NN(TYPE)                                \
> +    [[gnu::noipa, gnu::optimize ("-ffinite-math-only")]]     \
> +    auto ss_##TYPE##_no_nans (TYPE a, TYPE b)                \
> +    { return a <=> b; }                                      \
> +
> +/*  <=> implementation for floating-point operands. */
> +#define SPACESHIP_FP_IDIOM(TYPE)                                             
>  \
> +    [[gnu::noipa]]                                                           
>  \
> +    int ss_##TYPE##_idiom (TYPE a, TYPE b)                                   
>  \
> +    { return ((a) == (b) ? 0 : (a) < (b) ? -1 : (a) > (b) ? 1 : IMP_UN); }   
>  \
> +
> +#define RUN_TEST(TYPE, ARGA, ARGB, EXPECT, SUFF)            \
> +    if (ss_##TYPE##SUFF ((ARGA), (ARGB)) != (EXPECT))       \
> +        __builtin_abort();                                  \
> +
> +/*
> +** _Z8ss_floatff:
> +**   fcmpe   s0, s1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   cset    (w[0-9]+), vs
> +**   csinc   w0, \1, \2, ls
> +**   ret
> +*/
> +SPACESHIP_FN(float);
> +
> +/*
> +** _Z16ss_float_no_nansff:
> +**   fcmpe   s0, s1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   csinc   w0, \1, wzr, ls
> +**   ret
> +*/
> +SPACESHIP_FN_NN(float);
> +
> +/*
> +** _Z9ss_doubledd:
> +**   fcmpe   d0, d1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   cset    (w[0-9]+), vs
> +**   csinc   w0, \1, \2, ls
> +**   ret
> +*/
> +SPACESHIP_FN(double);
> +
> +/*
> +** _Z17ss_double_no_nansdd:
> +**   fcmpe   d0, d1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   csinc   w0, \1, wzr, ls
> +**   ret
> +*/
> +SPACESHIP_FN_NN(double);
> +
> +/*
> +** _Z14ss_float_idiomff:
> +**   fcmpe   s0, s1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   mov     (w[0-9]+), -128
> +**   csel    (w[0-9]+), \2, wzr, vs
> +**   csinc   w0, \1, \3, ls
> +**   ret
> +*/
> +SPACESHIP_FP_IDIOM(float);
> +
> +/*
> +** _Z15ss_double_idiomdd:
> +**   fcmpe   d0, d1
> +**   csinv   (w[0-9]+), wzr, wzr, pl
> +**   mov     (w[0-9]+), -128
> +**   csel    (w[0-9]+), \2, wzr, vs
> +**   csinc   w0, \1, \3, ls
> +**   ret
> +*/
> +SPACESHIP_FP_IDIOM(double);
> +
> +/*
> +** _Z10ss_int32_tii:
> +**   cmp     w0, w1
> +**   cset    (w[0-9]+), gt
> +**   csinv   w0, \1, wzr, ge
> +**   ret
> +*/
> +SPACESHIP_FN(int32_t);
> +
> +/*
> +** _Z10ss_int64_tll:
> +**   cmp     x0, x1
> +**   cset    (w[0-9]+), gt
> +**   csinv   w0, \1, wzr, ge
> +**   ret
> +*/
> +SPACESHIP_FN(int64_t);
> +
> +/*
> +** _Z11ss_uint32_tjj:
> +**   cmp     w0, w1
> +**   cset    (w[0-9]+), hi
> +**   csinv   w0, \1, wzr, cs
> +**   ret
> +*/
> +SPACESHIP_FN(uint32_t);
> +
> +/*
> +** _Z11ss_uint64_tmm:
> +**   cmp     x0, x1
> +**   cset    (w[0-9]+), hi
> +**   csinv   w0, \1, wzr, cs
> +**   ret
> +*/
> +SPACESHIP_FN(uint64_t);
> +
> +
> +int
> +main()
> +{
> +    /* Single precision floating point.  */
> +    RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less,);
> +    RUN_TEST (float, -1.0f, 1.0f, -1, _idiom);
> +
> +    RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater,);
> +    RUN_TEST (float, 1.0f, -1.0f, 1, _idiom);
> +
> +    RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent,);
> +    RUN_TEST (float, -1.0f, -1.0f, 0, _idiom);
> +
> +    RUN_TEST (float, __builtin_nanf(""), 1.0f, 
> std::partial_ordering::unordered,);
> +    RUN_TEST (float, __builtin_nanf(""), 1.0f, IMP_UN, _idiom);
> +    RUN_TEST (float, 1.0f ,__builtin_nanf(""), 
> std::partial_ordering::unordered,);
> +    RUN_TEST (float, 1.0f, __builtin_nanf(""), IMP_UN, _idiom);
> +
> +    /* No-NaNs.  */
> +    RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less, _no_nans);
> +    RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans);
> +    RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent, 
> _no_nans);
> +
> +    /* Double precision floating point.  */
> +    RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less,);
> +    RUN_TEST (double, -1.0f, 1.0f, -1, _idiom);
> +
> +    RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater,);
> +    RUN_TEST (double, 1.0f, -1.0f, 1, _idiom);
> +
> +    RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent,);
> +    RUN_TEST (double, -1.0f, -1.0f, 0, _idiom);
> +
> +    RUN_TEST (double, __builtin_nanf(""), 1.0f, 
> std::partial_ordering::unordered,);
> +    RUN_TEST (double, __builtin_nanf(""), 1.0f, IMP_UN, _idiom);
> +    RUN_TEST (double, 1.0f, __builtin_nanf(""), 
> std::partial_ordering::unordered,);
> +    RUN_TEST (double, 1.0f, __builtin_nanf(""), IMP_UN, _idiom);
> +
> +    /* No-NaNs.  */
> +    RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less, _no_nans);
> +    RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans);
> +    RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent, 
> _no_nans);
> +
> +    /* Single integer.  */
> +    RUN_TEST (int32_t, -42, 0, std::strong_ordering::less,);
> +    RUN_TEST (int32_t, 0, -42, std::strong_ordering::greater,);
> +    RUN_TEST (int32_t, 42, 42, std::strong_ordering::equal,);
> +
> +    RUN_TEST (uint32_t, 0, 42, std::strong_ordering::less,);
> +    RUN_TEST (uint32_t, 42, 0, std::strong_ordering::greater,);
> +    RUN_TEST (uint32_t, 42, 42, std::strong_ordering::equal,);
> +
> +    /* Double integer.  */
> +    RUN_TEST (int64_t, -42, 0, std::strong_ordering::less,);
> +    RUN_TEST (int64_t, 42, 0, std::strong_ordering::greater,);
> +    RUN_TEST (int64_t, 42, 42, std::strong_ordering::equal,);
> +
> +    RUN_TEST (uint64_t, 0, 42, std::strong_ordering::less,);
> +    RUN_TEST (uint64_t, 42, 0, std::strong_ordering::greater,);
> +    RUN_TEST (uint64_t, 42, 42, std::strong_ordering::equal,);
> +
> +    return 0;
> +}
> \ No newline at end of file
> diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_2.C 
> b/gcc/testsuite/g++.target/aarch64/spaceship_2.C
> new file mode 100644
> index 00000000000..c1d39002438
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/spaceship_2.C
> @@ -0,0 +1,72 @@
> +// PR117013
> +/* { dg-do run } */
> +// { dg-options "-O2 -std=c++20 -save-temps" }
> +
> +#include <compare>
> +
> +#ifndef fp_type
> +#define fp_type float
> +#endif
> +
> +#define TEST_SS_IDIOM(ARGA, ARGB, EXPECT)                   \
> +    if (spaceship_idiom ((ARGA), (ARGB)) != (EXPECT))       \
> +        __builtin_abort();                                  \
> +
> +#define TEST_BR_ON_SS(ARGA, ARGB, EXPECT)                   \
> +    if(branch_on_spaceship ((ARGA), (ARGB)) != (EXPECT))    \
> +        __builtin_abort();                                  \
> +
> +
> +#define RUN_TEST(ARGA, ARGB, EXPECT)                        \
> +    TEST_SS_IDIOM(ARGA, ARGB, EXPECT)                       \
> +    TEST_BR_ON_SS(ARGA, ARGB, EXPECT)                       \
> +
> +/*  Test when .SPACESHIP prompts the back end to implement <=> with
> +    conditional branches (only applies to floating-point operands).  */
> +
> +[[gnu::noipa]] auto
> +equiv() { return std::partial_ordering::equivalent; }
> +[[gnu::noipa]] auto
> +less() { return std::partial_ordering::less; }
> +[[gnu::noipa]] auto
> +greater() { return std::partial_ordering::greater; }
> +[[gnu::noipa]] auto
> +unordered() { return std::partial_ordering::unordered; }
> +
> +auto
> +spaceship_idiom(fp_type a, fp_type b)
> +{
> +    if (a == b)
> +        return equiv();
> +    if (a < b)
> +        return less();
> +    if (a > b)
> +        return greater();
> +    return unordered();
> +}
> +
> +auto
> +branch_on_spaceship(fp_type a, fp_type b)
> +{
> +    auto res = a <=> b;
> +    if (res == 0)
> +        return equiv();
> +    else if (res < 0)
> +        return less();
> +    else if (res > 0)
> +        return greater();
> +    return unordered();
> +}
> +
> +int
> +main()
> +{
> +    RUN_TEST (-1.0f, 1.0f, std::partial_ordering::less);
> +    RUN_TEST (1.0f, -1.0f, std::partial_ordering::greater);
> +    RUN_TEST (1.0f, 1.0f, std::partial_ordering::equivalent);
> +    RUN_TEST (1.0f, __builtin_nanf(""), std::partial_ordering::unordered);
> +    RUN_TEST (__builtin_nanf(""), 1.0f, std::partial_ordering::unordered);
> +}
> +
> +/* { dg-final { scan-assembler-not "\tfcmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_3.C 
> b/gcc/testsuite/g++.target/aarch64/spaceship_3.C
> new file mode 100644
> index 00000000000..f58b084d08c
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/spaceship_3.C
> @@ -0,0 +1,9 @@
> +// PR117013
> +/* { dg-do run } */
> +// { dg-options "-O2 -std=c++20 -save-temps" }
> +
> +#define fp_type double
> +#include "spaceship_2.C"
> +
> +/* { dg-final { scan-assembler-not "\tfcmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
> \ No newline at end of file

Re: [PATCH/GCC16 1/1] AArch64: Define the spaceship optab [PR117013]

Reply via email to