On Mon, Sep 2, 2024 at 4:42 PM Levy Hsu <ad...@levyhsu.com> wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
Ok.
>
> This patch supports sminmax for partial vectorized V2BF/V4BF.
>
> gcc/ChangeLog:
>
>         * config/i386/mmx.md (<code><mode>3): New define_expand for 
> V2BF/V4BFsmaxmin
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.
> ---
>  gcc/config/i386/mmx.md                        | 19 ++++++++++
>  .../avx10_2-partial-bf-vector-smaxmin-1.c     | 36 +++++++++++++++++++
>  2 files changed, 55 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 9116ddb5321..3f12a1349ab 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -2098,6 +2098,25 @@
>    DONE;
>  })
>
> +(define_expand "<code><mode>3"
> +  [(set (match_operand:VBF_32_64 0 "register_operand")
> +    (smaxmin:VBF_32_64
> +      (match_operand:VBF_32_64 1 "nonimmediate_operand")
> +      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
> +  "TARGET_AVX10_2_256"
> +{
> +  rtx op0 = gen_reg_rtx (V8BFmode);
> +  rtx op1 = lowpart_subreg (V8BFmode,
> +                           force_reg (<MODE>mode, operands[1]), <MODE>mode);
> +  rtx op2 = lowpart_subreg (V8BFmode,
> +                           force_reg (<MODE>mode, operands[2]), <MODE>mode);
> +
> +  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
> +
> +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
> +  DONE;
> +})
> +
>  (define_expand "sqrt<mode>2"
>    [(set (match_operand:VHF_32_64 0 "register_operand")
>         (sqrt:VHF_32_64
> diff --git 
> a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c 
> b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
> new file mode 100644
> index 00000000000..0a7cc58e29d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx10.2 -Ofast" } */
> +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
> +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
> +
> +void
> +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
> src2)
> +{
> +  int i;
> +  for (i = 0; i < 4; i++)
> +    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
> src2)
> +{
> +  int i;
> +  for (i = 0; i < 2; i++)
> +    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
> src2)
> +{
> +  int i;
> +  for (i = 0; i < 4; i++)
> +    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
> +}
> +
> +void
> +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict 
> src2)
> +{
> +  int i;
> +  for (i = 0; i < 2; i++)
> +    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to