On Fri, Aug 27, 2021 at 10:03 AM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we need 
> to refine the pattern to let gcc know mask register also affect the dest.
> So we put mask operand into UNSPEC_VSIBADDR.
>
> Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
> Ok for master?
Ok.
>
> gcc/ChangeLog:
>
>         PR target/101472
>         * config/i386/sse.md: (<avx512>scattersi<mode>): Add mask operand to
>         UNSPEC_VSIBADDR.
>         (<avx512>scattersi<mode>): Likewise.
>         (*avx512f_scattersi<VI48F:mode>): Merge mask operand to set_dest.
>         (*avx512f_scatterdi<VI48F:mode>): Likewise
>
> gcc/testsuite/ChangeLog:
>
>         PR target/101472
>         * gcc.target/i386/avx512f-pr101472.c: New test.
>         * gcc.target/i386/avx512vl-pr101472.c: New test.
> ---
>  gcc/config/i386/sse.md                        | 20 +++--
>  .../gcc.target/i386/avx512f-pr101472.c        | 49 ++++++++++++
>  .../gcc.target/i386/avx512vl-pr101472.c       | 79 +++++++++++++++++++
>  3 files changed, 140 insertions(+), 8 deletions(-)  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
> 03fc2df1fb0..a3055dbd316 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -24205,8 +24205,9 @@
>    "TARGET_AVX512F"
>  {
>    operands[5]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
> -                                       operands[4]), UNSPEC_VSIBADDR);
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
> +                                       operands[4], operands[1]),
> +                                       UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx512f_scattersi<VI48F:mode>"
> @@ -24214,10 +24215,11 @@
>           [(unspec:P
>              [(match_operand:P 0 "vsib_address_operand" "Tv")
>               (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
> -             (match_operand:SI 4 "const1248_operand" "n")]
> +             (match_operand:SI 4 "const1248_operand" "n")
> +             (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
>              UNSPEC_VSIBADDR)])
>         (unspec:VI48F
> -         [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
> +         [(match_dup 6)
>            (match_operand:VI48F 3 "register_operand" "v")]
>           UNSPEC_SCATTER))
>     (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -24243,8 
> +24245,9 @@
>    "TARGET_AVX512F"
>  {
>    operands[5]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
> -                                       operands[4]), UNSPEC_VSIBADDR);
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
> +                                       operands[4], operands[1]),
> +                                       UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx512f_scatterdi<VI48F:mode>"
> @@ -24252,10 +24255,11 @@
>           [(unspec:P
>              [(match_operand:P 0 "vsib_address_operand" "Tv")
>               (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
> -             (match_operand:SI 4 "const1248_operand" "n")]
> +             (match_operand:SI 4 "const1248_operand" "n")
> +             (match_operand:QI 6 "register_operand" "1")]
>              UNSPEC_VSIBADDR)])
>         (unspec:VI48F
> -         [(match_operand:QI 6 "register_operand" "1")
> +         [(match_dup 6)
>            (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
>           UNSPEC_SCATTER))
>     (clobber (match_scratch:QI 1 "=&Yk"))] diff --git 
> a/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c 
> b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
> new file mode 100644
> index 00000000000..89c6603c2ff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
> @@ -0,0 +1,49 @@
> +/* PR target/101472 */
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +
> +#include <immintrin.h>
> +
> +void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> +                        __m256i a, __m512i b)
> +{
> +    _mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
> +    _mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
> +    _mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
> +    _mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1); }
> +
> +void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> +                        __m256i idx, __m512i a)
> +{
> +    _mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
> +    _mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
> +    _mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
> +    _mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1); }
> +
> +void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> +                     __m256 a, __m512 b)
> +{
> +    _mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
> +    _mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
> +    _mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
> +    _mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1); }
> +
> +void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> +                     __m256i idx, __m512d a)
> +{
> +    _mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
> +    _mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
> +    _mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
> +    _mm512_mask_i32scatter_pd(addr, k2, idx, a, 1); }
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
> new file mode 100644
> index 00000000000..6df59a2eb7f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
> @@ -0,0 +1,79 @@
> +/* PR target/101472 */
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512vl -O2" } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +
> +
> +#include <immintrin.h>
> +
> +void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i 
> vindex1,
> +                        __m256i vindex2, __m128i src_epi32,
> +                        __m256i src_i32_epi32)
> +{
> +    _mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
> +    _mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
> +    _mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
> +    _mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
> +
> +    _mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
> +    _mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
> +    _mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
> +    _mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1);
> +}
> +
> +void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i 
> vindex1,
> +                        __m256i vindex2, __m128i src_epi64_mm,
> +                        __m256i src_epi64)
> +{
> +    _mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
> +    _mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
> +    _mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
> +    _mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
> +
> +    _mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
> +    _mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
> +    _mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
> +    _mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1); }
> +void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
> +                     __m256i vindex2, __m128 src_ps, __m256 src_i32_ps) {
> +    _mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
> +    _mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
> +    _mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
> +    _mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
> +
> +    _mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
> +    _mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
> +    _mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
> +    _mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1); }
> +
> +void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2,  __m128i vindex1,
> +                     __m256i vindex2, __m128d src_pd_mm, __m256d src_pd) {
> +    _mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
> +    _mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
> +    _mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
> +    _mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
> +
> +    _mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
> +    _mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
> +    _mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
> +    _mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1); }
> --
> 2.18.1
>


-- 
BR,
Hongtao

Reply via email to