On 7/12/24 03:23, Jiang, Haochen wrote:
-----Original Message-----
From: Hongtao Liu <crazy...@gmail.com>
Sent: Thursday, July 11, 2024 9:45 AM
To: Victor Do Nascimento <victor.donascime...@arm.com>
Cc: gcc-patches@gcc.gnu.org; richard.sandif...@arm.com;
richard.earns...@arm.com
Subject: Re: [PATCH 05/10] i386: Fix dot_prod backend patterns for mmx and
sse targets

On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento
<victor.donascime...@arm.com> wrote:

Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.
The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of
new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc>

gcc/ChangeLog:

         * config/i386/mmx.md (usdot_prodv8qi): Deleted.
         (usdot_prodv2siv8qi): New.

Hi Victor,

I suppose all the patterns are renamed not deleted and new right?
If that is the case, I suppose the log might be better and easier to understand
if changed to something like:

(old pattern): Renamed to ...
(new pattern): this.

Thx,
Haochen

You're right, it's a straight-forward renaming. I will amend the changelogs as per your suggestion.

Thanks for the tip!,
Victor

         (sdot_prodv8qi): Deleted.
         (sdot_prodv2siv8qi): New.
         (udot_prodv8qi): Deleted.
         (udot_prodv2siv8qi): New.
         (usdot_prodv4hi): Deleted.
         (usdot_prodv2siv4hi): New.
         (udot_prodv4hi): Deleted.
         (udot_prodv2siv4hi): New.
         (sdot_prodv4hi): Deleted.
         (sdot_prodv2siv4hi): New.
         * config/i386/sse.md (fourwayacc): New.
         (twowayacc): New.
         (sdot_prod<mode>): Deleted.
         (sdot_prod<twowayacc><mode>): New.
         (sdot_prodv4si): Deleted.
         (sdot_prodv2div4si): New.
         (usdot_prod<mode>): Deleted.
         (usdot_prod<fourwayacc><mode>): New.
         (sdot_prod<mode>): Deleted.
         (sdot_prod<fourwayacc><mode>): New.
         (sdot_prodv64qi): Deleted.
         (sdot_prodv16siv64qi): New.
         (udot_prod<mode>): Deleted.
         (udot_prod<fourwayacc><mode>): New.
         (udot_prodv64qi): Deleted.
         (udot_prodv16qiv64qi): New.
         (usdot_prod<mode>): Deleted.
         (usdot_prod<twowayacc><mode>): New.
         (udot_prod<mode>): Deleted.
         (udot_prod<twowayacc><mode>): New.
---
  gcc/config/i386/mmx.md | 30 +++++++++++++--------------
gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++----------------
-
  2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index
94d3a6e5692..d78739b033d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
    DONE;
  })

-(define_expand "usdot_prodv8qi"
+(define_expand "usdot_prodv2siv8qi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V8QI 1 "register_operand")
     (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@
(define_expand "usdot_prodv8qi"
        rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
        rtx op0 = gen_reg_rtx (V4SImode);

-      emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
        emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
       }
     else
@@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
        emit_move_insn (op3, CONST0_RTX (V4SImode));
        emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
        emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));

        /* vec_perm (op0, 2, 3, 0, 1);  */
        emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
-6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
      DONE;
  })

-(define_expand "sdot_prodv8qi"
+(define_expand "sdot_prodv2siv8qi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V8QI 1 "register_operand")
     (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@
(define_expand "sdot_prodv8qi"
        rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
        rtx op0 = gen_reg_rtx (V4SImode);

-      emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
        emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
      }
    else
@@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
        emit_move_insn (op3, CONST0_RTX (V4SImode));
        emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
        emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));

        /* vec_perm (op0, 2, 3, 0, 1);  */
        emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
-6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"

  })

-(define_expand "udot_prodv8qi"
+(define_expand "udot_prodv2siv8qi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V8QI 1 "register_operand")
     (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@
(define_expand "udot_prodv8qi"
        rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
        rtx op0 = gen_reg_rtx (V4SImode);

-      emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
        emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
      }
    else
@@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
        emit_move_insn (op3, CONST0_RTX (V4SImode));
        emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
        emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));

        /* vec_perm (op0, 2, 3, 0, 1);  */
        emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
-6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"

  })

-(define_expand "usdot_prodv4hi"
+(define_expand "usdot_prodv2siv4hi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V4HI 1 "register_operand")
     (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12
@@
(define_expand "usdot_prodv4hi"
    rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
    rtx op0 = gen_reg_rtx (V4SImode);

-  emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
    emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
    DONE;
  })

-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V4HI 1 "register_operand")
     (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12
@@
(define_expand "udot_prodv4hi"
    rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
    rtx op0 = gen_reg_rtx (V4SImode);

-  emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
    emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
    DONE;
  })

-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
    [(match_operand:V2SI 0 "register_operand")
     (match_operand:V4HI 1 "register_operand")
     (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@
(define_expand "sdot_prodv4hi"
    rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
    rtx op0 = gen_reg_rtx (V4SImode);

-  emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
    emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
V4SImode));
    DONE;
  })
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
bda66d5e121..861b87bb50f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode
     (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
     (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])

+;; Mapping of input type to 4-way accumulated type (define_mode_attr
+fourwayacc
+  [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
+
+;; Mapping of input type to 2-way accumulated type (define_mode_attr
+twowayacc
+  [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")
+   (V32QI "v16hi") (V16QI "v8hi")])
+
  ;; Pointer size override for scalar modes (Intel asm dialect)
(define_mode_attr iptr
    [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7
+16721,7 @@ (define_mode_attr SDOT_PMADD_SUF  (define_mode_attr
SDOT_VPDP_SUF
    [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])

-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<twowayacc><mode>"
    [(match_operand:<sseunpackmode> 0 "register_operand")
     (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
     (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ -
16747,7
+16756,7 @@ (define_expand "sdot_prod<mode>"

  ;; Normally we use widen_mul_even/odd, but combine can't quite get it
all  ;; back together when madd is available.
-(define_expand "sdot_prodv4si"
+(define_expand "sdot_prodv2div4si"
    [(match_operand:V2DI 0 "register_operand")
     (match_operand:V4SI 1 "register_operand")
     (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@
(define_insn "vpshldv_<mode>_maskz_1"
     [(set_attr ("prefix") ("evex"))
     (set_attr "mode" "<sseinsnmode>")])

-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<fourwayacc><mode>"
    [(match_operand:<ssedvecmode> 0 "register_operand")
     (match_operand:VI1_AVX512 1 "register_operand")
     (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9
+30337,9 @@ (define_expand "usdot_prod<mode>"
        rtx sum = gen_reg_rtx (<ssedvecmode>mode);

        emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+      emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
+ op1_lo,
                                                     op2_lo, sum));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+      emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
+ op1_hi,
                                                     op2_hi, operands[3]));
        emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
      }
@@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype
     (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
     (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])

-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<fourwayacc><mode>"
    [(match_operand:<ssedvecmode> 0 "register_operand")
     (match_operand:VI1_AVX2 1 "register_operand")
     (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9
+31194,9
@@ (define_expand "sdot_prod<mode>"
        rtx sum = gen_reg_rtx (<ssedvecmode>mode);

        emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+      emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
+ op1_lo,
                                                     op2_lo, sum));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+      emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
+ op1_hi,
                                                     op2_hi, operands[3]));
        emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
      }
@@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>"
    DONE;
  })

-(define_expand "sdot_prodv64qi"
+(define_expand "sdot_prodv16siv64qi"
    [(match_operand:V16SI 0 "register_operand")
     (match_operand:V64QI 1 "register_operand")
     (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14
@@ (define_expand "sdot_prodv64qi"
    rtx sum = gen_reg_rtx (V16SImode);

    emit_move_insn (sum, CONST0_RTX (V16SImode));
-  emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
-  emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+  emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
+ operands[3]));

    emit_insn (gen_addv16si3 (operands[0], res1, res2));
    DONE;
  })

-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<fourwayacc><mode>"
    [(match_operand:<ssedvecmode> 0 "register_operand")
     (match_operand:VI1_AVX2 1 "register_operand")
     (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9
+31270,9
@@ (define_expand "udot_prod<mode>"
       rtx sum = gen_reg_rtx (<ssedvecmode>mode);

       emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-     emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+     emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
+ op1_lo,
                                                     op2_lo, sum));
-     emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+     emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
+ op1_hi,
                                                     op2_hi, operands[3]));
       emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
     }
@@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>"
    DONE;
  })

-(define_expand "udot_prodv64qi"
+(define_expand "udot_prodv16qiv64qi"
    [(match_operand:V16SI 0 "register_operand")
     (match_operand:V64QI 1 "register_operand")
     (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8
@@
(define_expand "udot_prodv64qi"
    rtx sum = gen_reg_rtx (V16SImode);

    emit_move_insn (sum, CONST0_RTX (V16SImode));
-  emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
-  emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+  emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
+ operands[3]));

    emit_insn (gen_addv16si3 (operands[0], res1, res2));
    DONE;
@@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype
     (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
     (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])

-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<twowayacc><mode>"
    [(match_operand:<sseunpackmode> 0 "register_operand")
     (match_operand:VI2_AVX2 1 "register_operand")
     (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7
+31428,7
@@ (define_expand "usdot_prod<mode>"
    DONE;
  })

-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<twowayacc><mode>"
    [(match_operand:<sseunpackmode> 0 "register_operand")
     (match_operand:VI2_AVX2 1 "register_operand")
     (match_operand:VI2_AVX2 2 "register_operand")
--
2.34.1



--
BR,
Hongtao

Reply via email to