This patch add combine patterns to combine vfsgnj.vv + vcond_mask to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend currently. We will send another patch to take this issue.
gcc/ChangeLog: * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move. (*cond_copysign<mode>): New combine pattern. * config/riscv/riscv-v.cc (needs_fp_rounding): Extend. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test. --- gcc/config/riscv/autovec-opt.md | 68 +++++++++---- gcc/config/riscv/riscv-v.cc | 4 +- .../rvv/autovec/cond/cond_copysign-run.c | 99 +++++++++++++++++++ .../rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++ .../rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++ .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++ .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++ 7 files changed, 349 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 58e80044f1e..f759525f96b 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -609,6 +609,10 @@ (set_attr "mode" "<V_DOUBLE_TRUNC>") (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))]) +;; ============================================================================= +;; Combine op + vmerge to cond_op +;; ============================================================================= + ;; Combine <op> and vcond_mask generated by midend into cond_len_<op> ;; Currently supported operations: ;; abs(FP) @@ -651,25 +655,6 @@ DONE; }) -;; Combine vlmax neg and UNSPEC_VCOPYSIGN -(define_insn_and_split "*copysign<mode>_neg" - [(set (match_operand:VF 0 "register_operand") - (neg:VF - (unspec:VF [ - (match_operand:VF 1 "register_operand") - (match_operand:VF 2 "register_operand") - ] UNSPEC_VCOPYSIGN)))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode), - riscv_vector::BINARY_OP, operands); - DONE; -} -[(set_attr "type" "vector")]) - ;; Combine sign_extend/zero_extend(vf2) and vcond_mask (define_insn_and_split "*cond_<optab><v_double_trunc><mode>" [(set (match_operand:VWEXTI 0 "register_operand") @@ -918,6 +903,27 @@ } [(set_attr "type" "vector")]) +;; Combine vfsgnj.vv + vcond_mask +(define_insn_and_split "*cond_copysign<mode>" + [(set (match_operand:VF 0 "register_operand") + (if_then_else:VF + (match_operand:<VM> 1 "register_operand") + (unspec:VF + [(match_operand:VF 2 "register_operand") + (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN) + (match_operand:VF 4 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4], + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; + riscv_vector::expand_cond_len_binop (icode, ops); + DONE; +}) + ;; ============================================================================= ;; Combine extend + binop to widen_binop ;; ============================================================================= @@ -1119,3 +1125,27 @@ DONE; } [(set_attr "type" "vfwmul")]) + + +;; ============================================================================= +;; Misc combine patterns +;; ============================================================================= + +;; Combine vlmax neg and UNSPEC_VCOPYSIGN +(define_insn_and_split "*copysign<mode>_neg" + [(set (match_operand:VF 0 "register_operand") + (neg:VF + (unspec:VF [ + (match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand") + ] UNSPEC_VCOPYSIGN)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode), + riscv_vector::BINARY_OP, operands); + DONE; +} +[(set_attr "type" "vector")]) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4d95bd773a2..76e6094f45b 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode) && icode != maybe_code_for_pred_extend (mode) /* narrower-INT -> FP */ && icode != maybe_code_for_pred_widen (FLOAT, mode) - && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode); + && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode) + /* vfsgnj */ + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode); } /* Subroutine to expand COND_LEN_* patterns. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c new file mode 100644 index 00000000000..be37854c135 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c @@ -0,0 +1,99 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +#include <assert.h> + +#define SZ 512 + +#define EPS 1e-6 + +#define INIT_PRED() \ + int pred[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + pred[i] = i % 3; \ + } + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE[i] = i; \ + b##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN2(TYPE, VAL) \ + TYPE a2##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a2##TYPE[i] = i; \ + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); + +#define RUN3(TYPE, VAL) \ + TYPE a3##TYPE[SZ]; \ + TYPE b3##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a3##TYPE[i] = (i & 1) ? -i : i; \ + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); + +#define RUN4(TYPE, VAL) \ + TYPE a4##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a4##TYPE[i] = -i; \ + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); + +#define RUN5(TYPE, VAL) \ + TYPE a5##TYPE[SZ]; \ + TYPE b5##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a5##TYPE[i] = i; \ + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] \ + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN6(TYPE, VAL) \ + TYPE a6##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a6##TYPE[i] = i; \ + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); + +#define RUN_ALL() \ + RUN (float, 5) \ + RUN (double, 6) \ + RUN2 (float, 11) \ + RUN2 (double, 12) \ + RUN3 (float, 16) \ + RUN3 (double, 18) \ + RUN4 (float, 17) \ + RUN4 (double, 19) \ + RUN5 (float, 123) \ + RUN5 (double, 523) \ + RUN6 (float, 777) \ + RUN6 (double, 877) + +int +main () +{ + INIT_PRED () + RUN_ALL () +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c new file mode 100644 index 00000000000..cef531b9700 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which + expand cannot handle currently. + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */ +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c new file mode 100644 index 00000000000..cc2aa4de757 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which + expand cannot handle currently. + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */ +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h new file mode 100644 index 00000000000..4191500fd83 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h @@ -0,0 +1,81 @@ +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \ + } + +#define TEST_TYPE2(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; \ + } + +#define TEST_TYPE3(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] \ + = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; \ + } + +#define TEST_TYPE4(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i]; \ + } + +#define TEST_TYPE5(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \ + } + +#define TEST_TYPE6(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; \ + } + +#define TEST_ALL() \ + TEST_TYPE (_Float16, f16) \ + TEST_TYPE (float, f) \ + TEST_TYPE (double, ) \ + TEST_TYPE2 (_Float16, f16) \ + TEST_TYPE2 (float, f) \ + TEST_TYPE2 (double, ) \ + TEST_TYPE3 (_Float16, f16) \ + TEST_TYPE3 (float, f) \ + TEST_TYPE3 (double, ) \ + TEST_TYPE4 (_Float16, f16) \ + TEST_TYPE4 (float, f) \ + TEST_TYPE4 (double, ) \ + TEST_TYPE5 (_Float16, f16) \ + TEST_TYPE5 (float, f) \ + TEST_TYPE5 (double, ) \ + TEST_TYPE6 (_Float16, f16) \ + TEST_TYPE6 (float, f) \ + TEST_TYPE6 (double, ) + +TEST_ALL () diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c new file mode 100644 index 00000000000..6e337f9e74c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c @@ -0,0 +1,93 @@ +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +#include <assert.h> + +#define SZ 512 + +#define EPS 1e-6 + +#define INIT_PRED() \ + int pred[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + pred[i] = i % 3; \ + } + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE[i] = i; \ + b##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN2(TYPE, VAL) \ + TYPE a2##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a2##TYPE[i] = i; \ + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); + +#define RUN3(TYPE, VAL) \ + TYPE a3##TYPE[SZ]; \ + TYPE b3##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a3##TYPE[i] = (i & 1) ? -i : i; \ + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); + +#define RUN4(TYPE, VAL) \ + TYPE a4##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a4##TYPE[i] = -i; \ + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); + +#define RUN5(TYPE, VAL) \ + TYPE a5##TYPE[SZ]; \ + TYPE b5##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a5##TYPE[i] = i; \ + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] \ + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN6(TYPE, VAL) \ + TYPE a6##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a6##TYPE[i] = i; \ + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); + +#define RUN_ALL() \ + RUN (_Float16, 5) \ + RUN2 (_Float16, 11) \ + RUN3 (_Float16, 16) \ + RUN4 (_Float16, 17) \ + RUN5 (_Float16, 123) \ + RUN6 (_Float16, 777) + +int +main () +{ + INIT_PRED () + RUN_ALL () +} -- 2.36.3