Hi, In order to fix PR92098, we need to define vec_cmp_* and vcond_mask_*. In fact, PR92132 already fixed the issue on the trunk. We need to backport PR92132 int part to gcc-9-branch. This patch backport vector_{ungt,unge,unlt,unle}<mode> for vec_{cmp,cmpu}<mode><mode> interface and related expand to gcc-9-branch.
The regression testing for the patch was done on GCC 9 branch on powerpc64le-unknown-linux-gnu (Power 9 LE) with no regressions. Is it OK for GCC 9 branch ? Thanks, Lijia He gcc/ChangeLog 2019-11-27 Li Jia He <heli...@linux.ibm.com> PR target/92098 * config/rs6000/predicates.md (signed_or_equality_comparison_operator): New predicate. (unsigned_or_equality_comparison_operator): Likewise. * config/rs6000/rs6000.md (one_cmpl<mode>2): Remove expand. (one_cmpl<mode>3_internal): Rename to one_cmpl<mode>2. * config/rs6000/vector.md (vcond_mask_<mode><mode> for VEC_I and VEC_I): New expand. (vec_cmp<mode><mode> for VEC_I and VEC_I): Likewise. (vec_cmpu<mode><mode> for VEC_I and VEC_I): Likewise. gcc/testsuite/ChangeLog 2019-11-27 Li Jia He <heli...@linux.ibm.com> PR target/92098 * gcc.target/powerpc/pr92098-int-1.c: New test. * gcc.target/powerpc/pr92098-int-2.c: New test. --- gcc/config/rs6000/predicates.md | 10 ++ gcc/config/rs6000/rs6000.md | 8 +- gcc/config/rs6000/vector.md | 95 +++++++++++++ .../gcc.target/powerpc/pr92098-int-1.c | 126 ++++++++++++++++++ .../gcc.target/powerpc/pr92098-int-2.c | 126 ++++++++++++++++++ 5 files changed, 358 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr92098-int-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr92098-int-2.c diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 5cc80dea66c..e6e81471a02 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1154,6 +1154,16 @@ (define_predicate "signed_comparison_operator" (match_code "lt,gt,le,ge")) +;; Return 1 if OP is a signed comparison or an equality operator. +(define_predicate "signed_or_equality_comparison_operator" + (ior (match_operand 0 "equality_operator") + (match_operand 0 "signed_comparison_operator"))) + +;; Return 1 if OP is an unsigned comparison or an equality operator. +(define_predicate "unsigned_or_equality_comparison_operator" + (ior (match_operand 0 "equality_operator") + (match_operand 0 "unsigned_comparison_operator"))) + ;; Return 1 if OP is a comparison operation that is valid for an SCC insn -- ;; it must be a positive comparison. (define_predicate "scc_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 7bfa5eea2ee..32da805a32d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6430,12 +6430,6 @@ "" "") -(define_expand "one_cmpl<mode>2" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand")))] - "" - "") - (define_expand "nor<mode>3" [(set (match_operand:BOOL_128 0 "vlogical_operand") (and:BOOL_128 @@ -6730,7 +6724,7 @@ (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "*one_cmpl<mode>3_internal" +(define_insn_and_split "one_cmpl<mode>2" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 70bcfe02e22..0b62dd04bde 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -493,6 +493,101 @@ FAIL; }) +;; To support vector condition vectorization, define vcond_mask and vec_cmp. + +;; Same mode for condition true/false values and predicate operand. +(define_expand "vcond_mask_<mode><mode>" + [(match_operand:VEC_I 0 "vint_operand") + (match_operand:VEC_I 1 "vint_operand") + (match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + emit_insn (gen_vector_select_<mode> (operands[0], operands[2], operands[1], + operands[3])); + DONE; +}) + +;; For signed integer vectors comparison. +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "signed_or_equality_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GE: + emit_insn (gen_vector_nlt<mode> (operands[0],operands[2], operands[3], + tmp)); + break; + case GT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[2], operands[3])); + break; + case LE: + emit_insn (gen_vector_ngt<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LT: + emit_insn (gen_vector_gt<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + +;; For unsigned integer vectors comparison. +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VEC_I 0 "vint_operand") + (match_operator 1 "unsigned_or_equality_comparison_operator" + [(match_operand:VEC_I 2 "vint_operand") + (match_operand:VEC_I 3 "vint_operand")]))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp = gen_reg_rtx (<MODE>mode); + switch (code) + { + case NE: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[0])); + break; + case EQ: + emit_insn (gen_vector_eq<mode> (operands[0], operands[2], operands[3])); + break; + case GEU: + emit_insn (gen_vector_nltu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case GTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[2], operands[3])); + break; + case LEU: + emit_insn (gen_vector_ngtu<mode> (operands[0], operands[2], operands[3], + tmp)); + break; + case LTU: + emit_insn (gen_vector_gtu<mode> (operands[0], operands[3], operands[2])); + break; + default: + gcc_unreachable (); + break; + } + DONE; +}) + (define_expand "vector_eq<mode>" [(set (match_operand:VEC_C 0 "vlogical_operand") (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand") diff --git a/gcc/testsuite/gcc.target/powerpc/pr92098-int-1.c b/gcc/testsuite/gcc.target/powerpc/pr92098-int-1.c new file mode 100644 index 00000000000..57fba774885 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92098-int-1.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + signed int type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define INT_TYPE signed int + +__attribute__ ((noinline)) int +test_eq (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ne (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_gt (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_ge (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_lt (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) int +test_le (INT_TYPE *a, INT_TYPE min_v) +{ + int last = 0; + + for (int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + INT_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + INT_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr92098-int-2.c b/gcc/testsuite/gcc.target/powerpc/pr92098-int-2.c new file mode 100644 index 00000000000..372c7af14a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92098-int-2.c @@ -0,0 +1,126 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -ftree-vectorize -mdejagnu-cpu=power8 -fno-vect-cost-model -fdump-tree-vect-details" } */ + +/* To test condition reduction vectorization, where comparison operands are of + unsigned int type and condition true/false values are integer type. */ + +#include <math.h> + +extern void +abort (void) __attribute__ ((noreturn)); + +#define N 27 +#define INT_TYPE unsigned int + +__attribute__ ((noinline)) unsigned int +test_eq (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] == min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) unsigned int +test_ne (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] != min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) unsigned int +test_gt (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] > min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) unsigned int +test_ge (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] >= min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) unsigned int +test_lt (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +__attribute__ ((noinline)) unsigned int +test_le (INT_TYPE *a, INT_TYPE min_v) +{ + unsigned int last = 0; + + for (unsigned int i = 0; i < N; i++) + if (a[i] <= min_v) + last = i; + + return last; +} + +int +main (void) +{ + int ret = 0; + + INT_TYPE a1[N] = {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 21, 22, 23, 24, 25, 26, 27}; + + INT_TYPE a2[N] = {21, 22, 23, 24, 25, 26, 27, 28, 29, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 1, 2, 3, 4, 5, 6, 7}; + + ret = test_eq (a1, 10); + if (ret != 19) + abort (); + + ret = test_ne (a1, 10); + if (ret != 26) + abort (); + + ret = test_gt (a2, 10); + if (ret != 19) + abort (); + + ret = test_ge (a2, 10); + if (ret != 19) + abort (); + + ret = test_lt (a1, 10); + if (ret != 18) + abort (); + + ret = test_le (a1, 10); + if (ret != 19) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ -- 2.17.1