https://gcc.gnu.org/g:7e9545480f33141bedcbc3e5bf86fdb3db0d2a03
commit 7e9545480f33141bedcbc3e5bf86fdb3db0d2a03 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed Aug 14 13:12:32 2024 -0400 Revert changes Diff: --- gcc/ChangeLog.bugs | 31 ++++++++++++++++++++++++++++-- gcc/config/rs6000/vsx.md | 18 ----------------- gcc/testsuite/gcc.target/powerpc/pr99293.c | 22 --------------------- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 5e3009425c27..4a1989456fc6 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -2,6 +2,33 @@ Optimize splat of a V2DF/V2DI extract with constant element +We had optimizations for splat of a vector extract for the other vector +types, but we missed having one for V2DI and V2DF. This patch adds a +combiner insn to do this optimization. + +In looking at the source, we had similar optimizations for V4SI and V4SF +extract and splats, but we missed doing V2DI/V2DF. + +Without the patch for the code: + + vector long long splat_dup_l_0 (vector long long v) + { + return __builtin_vec_splats (__builtin_vec_extract (v, 0)); + } + +the compiler generates (on a little endian power9): + + splat_dup_l_0: + mfvsrld 9,34 + mtvsrdd 34,9,9 + blr + +Now it generates: + + splat_dup_l_0: + xxpermdi 34,34,34,3 + blr + 2024-08-14 Michael Meissner <meiss...@linux.ibm.com> gcc/ @@ -33,7 +60,7 @@ With this patch, GCC now realizes that the vector shift instructions will look at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or XXSPLTIB instruction to load the shift count. -[gcc] +gcc/ 2024-08-12 Michael Meissner <meiss...@linux.ibm.com> PR target/89213 @@ -48,7 +75,7 @@ XXSPLTIB instruction to load the shift count. * config/rs6000/predicates.md (vector_shift_constant): New predicate. -[gcc/testsuite] +gcc/testsuite/ 2024-08-12 Michael Meissner <meiss...@linux.ibm.com> PR target/89213 diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 6e8cd041f3b8..7892477fa922 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4797,24 +4797,6 @@ "lxvdsx %x0,%y1" [(set_attr "type" "vecload")]) -;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element -(define_insn "*vsx_splat_extract_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") - (vec_duplicate:VSX_D - (vec_select:<VEC_base> - (match_operand:VSX_D 1 "vsx_register_operand" "wa") - (parallel [(match_operand 2 "const_0_to_1_operand" "n")]))))] - "VECTOR_MEM_VSX_P (<MODE>mode)" -{ - int which_word = INTVAL (operands[2]); - if (!BYTES_BIG_ENDIAN) - which_word = 1 - which_word; - - operands[3] = GEN_INT (which_word ? 3 : 0); - return "xxpermdi %x0,%x1,%x1,%3"; -} - [(set_attr "type" "vecperm")]) - ;; V4SI splat support (define_insn "vsx_splat_v4si" [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c deleted file mode 100644 index 20adc1f27f65..000000000000 --- a/gcc/testsuite/gcc.target/powerpc/pr99293.c +++ /dev/null @@ -1,22 +0,0 @@ -/* { dg-do compile { target powerpc*-*-* } } */ -/* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mvsx" } */ - -/* Test for PR 99263, which wants to do: - __builtin_vec_splats (__builtin_vec_extract (v, n)) - - where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the - compiler would do a direct move to the GPR registers to select the item and a - direct move from the GPR registers to do the splat. */ - -vector long long splat_dup_l_0 (vector long long v) -{ - return __builtin_vec_splats (__builtin_vec_extract (v, 0)); -} - -vector long long splat_dup_l_1 (vector long long v) -{ - return __builtin_vec_splats (__builtin_vec_extract (v, 1)); -} - -/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */