[PATCH 2/2] Add IEEE 128-bit fp conditional move on PowerPC. This patch adds the support for power10 IEEE 128-bit floating point conditional move and for automatically generating min/max.
In this patch, I simplified things. Instead of allowing any four of the modes to be used for the conditional move comparison and the move itself could use different modes, I restricted the conditional move to just the same mode. I.e. you can do: _Float128 a, b, c, d, e, r; r = (a == b) ? c : d; But you can't do: _Float128 c, d, r; double a, b; r = (a == b) ? c : d; or: _Float128 a, b; double c, d, r; r = (a == b) ? c : d; This eliminates a lot of the complexity of the code, because you don't have to worry about the sizes being different, and the IEEE 128-bit types being restricted to Altivec registers, while the SF/DF modes can use any VSX register. I did not modify the existing support that allowed conditional moves where SFmode operands are compared and DFmode operands are moved (and vice versa). This simplification also eliminates having to insert a XXPERMDI instruction if you are comparing 64-bit values and doing a conditional move with 128-bit values. I modified the test cases that I added to reflect this change. I have also fixed the test for not equal to use '!=' instead of '=='. I have built bootstrap compilers on both a little endian power9 Linux system and a big endian power8 Linux system. There were no regressions in either build in the test suites. Can I check these changes into the trunk for gcc 11? gcc/ 2021-04-14 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.c (have_compare_and_set_mask): Add IEEE 128-bit floating point types. * config/rs6000/rs6000.md (mov<mode>cc, IEEE128 iterator): New insn. (mov<mode>cc_p10, IEEE128 iterator): New insn. (mov<mode>cc_invert_p10, IEEE128 iterator): New insn. (fpmask<mode>, IEEE128 iterator): New insn. (xxsel<mode>, IEEE128 iterator): New insn. gcc/testsuite/ 2021-04-14 Michael Meissner <meiss...@linux.ibm.com> * gcc.target/powerpc/float128-cmove.c: New test. * gcc.target/powerpc/float128-minmax-3.c: New test. --- gcc/config/rs6000/rs6000.c | 8 +- gcc/config/rs6000/rs6000.md | 106 ++++++++++++++++++ .../gcc.target/powerpc/float128-cmove.c | 58 ++++++++++ .../gcc.target/powerpc/float128-minmax-3.c | 15 +++ 4 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8d00f99e9fd..f979d7320ad 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -15706,8 +15706,8 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, return 1; } -/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or - minimum with "C" semantics. +/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a + maximum or minimum with "C" semantics. Unless you use -ffast-math, you can't use these instructions to replace conditions that implicitly reverse the condition because the comparison @@ -15843,6 +15843,10 @@ have_compare_and_set_mask (machine_mode mode) case E_DFmode: return TARGET_P9_MINMAX; + case E_KFmode: + case E_TFmode: + return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode); + default: break; } diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 17b2fdc1cdd..c74fc8ce6fc 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5429,6 +5429,112 @@ (define_insn "*xxsel<mode>" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) +;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the +;; comparison must be the same as used in the conditional move. +(define_expand "mov<mode>cc" + [(set (match_operand:IEEE128 0 "gpc_reg_operand") + (if_then_else:IEEE128 (match_operand 1 "comparison_operator") + (match_operand:IEEE128 2 "gpc_reg_operand") + (match_operand:IEEE128 3 "gpc_reg_operand")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" +{ + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*mov<mode>cc_p10" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) + (clobber (match_scratch:V2DI 6 "=0,&v"))] + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 1) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 4) + (match_dup 5)))] +{ + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*mov<mode>cc_invert_p10" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) + (clobber (match_scratch:V2DI 6 "=0,&v"))] + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +(define_insn "*fpmask<mode>" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (if_then_else:V2DI + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "v")]) + (match_operand:V2DI 4 "all_ones_constant" "") + (match_operand:V2DI 5 "zero_constant" "")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xscmp%V1qp %0,%2,%3" + [(set_attr "type" "fpcompare")]) + +(define_insn "*xxsel<mode>" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (if_then_else:IEEE128 + (ne (match_operand:V2DI 1 "altivec_register_operand" "v") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:IEEE128 3 "altivec_register_operand" "v") + (match_operand:IEEE128 4 "altivec_register_operand" "v")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)" + "xxsel %x0,%x4,%x3,%x1" + [(set_attr "type" "vecmove")]) + ;; Conversions to and from floating-point. diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c new file mode 100644 index 00000000000..2fae8dc23bc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#ifndef TYPE +#ifdef __LONG_DOUBLE_IEEE128__ +#define TYPE long double + +#else +#define TYPE _Float128 +#endif +#endif + +/* Verify that the ISA 3.1 (power10) IEEE 128-bit conditional move instructions + are generated. */ + +TYPE +eq (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a == b) ? c : d; +} + +TYPE +ne (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a != b) ? c : d; +} + +TYPE +lt (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a < b) ? c : d; +} + +TYPE +le (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a <= b) ? c : d; +} + +TYPE +gt (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a > b) ? c : d; +} + +TYPE +ge (TYPE a, TYPE b, TYPE c, TYPE d) +{ + return (a >= b) ? c : d; +} + +/* { dg-final { scan-assembler-times {\mxscmpeqqp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxscmpgeqp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxscmpgtqp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxsel\M} 6 } } */ +/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c new file mode 100644 index 00000000000..6f7627c0f2a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c @@ -0,0 +1,15 @@ +/* { dg-require-effective-target ppc_float128_hw } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +#ifndef TYPE +#define TYPE _Float128 +#endif + +/* Test that the fminf128/fmaxf128 functions generate if/then/else and not a + call. */ +TYPE f128_min (TYPE a, TYPE b) { return (a < b) ? a : b; } +TYPE f128_max (TYPE a, TYPE b) { return (b > a) ? b : a; } + +/* { dg-final { scan-assembler {\mxsmaxcqp\M} } } */ +/* { dg-final { scan-assembler {\mxsmincqp\M} } } */ -- 2.22.0 -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797