Hi Haochen,
on 2024/1/26 09:17, HAO CHEN GUI wrote:
> Hi,
> This patch creates an insn_and_split pattern which helps the duplicated
> constant vector replace the source pseudo of store insn in fwprop pass.
> Thus the store can be implemented by a single stxvd2x and it eliminates the
> unnecessary byte swap insn on P8 LE. The test case shows the optimization.
>
> The patch depends on the first generic patch which uses insn cost in fwprop.
>
> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
> regressions.
>
> Thanks
> Gui Haochen
>
>
> ChangeLog
> rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store
>
> gcc/
> PR target/113325
> * config/rs6000/predicates.md (duplicate_easy_altivec_constant): New.
> * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_<mode>): New.
>
> gcc/testsuite/
> PR target/113325
> * gcc.target/powerpc/pr113325.c: New.
>
>
> patch.diff
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index ef7d3f214c4..8ab6db630b7 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant"
> return false;
> })
>
> +;; Return 1 if it's a duplicated easy_altivec_constant.
> +(define_predicate "duplicate_easy_altivec_constant"
> + (and (match_code "const_vector")
> + (match_test "easy_altivec_constant (op, mode)"))
> +{
> + return const_vec_duplicate_p (op);
> +})
This predicate can be moved to its only use (define_insn part condition).
The const_vector match_code check is redundant as const_vec_duplicate_p
already checks that, I wonder if we really need easy_altivec_constant?
Even if one vector constant doesn't meet easy_altivec_constant, but if
it matches the desired duplicated pattern, it doesn't need the swapping
either, no?
> +
> ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
> (define_predicate "easy_vector_constant_add_self"
> (and (match_code "const_vector")
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 26fa32829af..98e4be26f64 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>"
> "stxvd2x %x1,%y0"
> [(set_attr "type" "vecstore")])
>
> +(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>"
> + [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
> + (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))]
> + "!BYTES_BIG_ENDIAN
> + && VECTOR_MEM_VSX_P (<MODE>mode)
> + && !TARGET_P9_VECTOR"
> + "#"
> + "&& 1"
> + [(set (match_dup 2)
> + (match_dup 1))
> + (set (match_dup 0)
> + (vec_select:VSX_W
> + (match_dup 2)
> + (parallel [(const_int 2) (const_int 3)
> + (const_int 0) (const_int 1)])))]
> +{
> + operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
> + : operands[1];
> +
> +}
> + [(set_attr "type" "vecstore")
> + (set_attr "length" "8")])
> +
> (define_insn "*vsx_stxvd2x8_le_V8HI"
> [(set (match_operand:V8HI 0 "memory_operand" "=Z")
> (vec_select:V8HI
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c
> b/gcc/testsuite/gcc.target/powerpc/pr113325.c
> new file mode 100644
> index 00000000000..dff68ac0a51
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
Nit: s/powerpc_vsx_ok/powerpc_vsx/
BR,
Kewen
> +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
> +
> +void* foo (void* s1)
> +{
> + return __builtin_memset (s1, 0, 32);
> +}