On Thu, 17 Jun 2021, Uros Bizjak wrote:
> On Thu, Jun 17, 2021 at 11:44 AM Richard Biener <[email protected]> wrote:
> >
> > This addds SLP pattern recognition for the SSE3/AVX [v]addsubp{ds} v0, v1
> > instructions which compute { v0[0] - v1[0], v0[1], + v1[1], ... }
> > thus subtract, add alternating on lanes, starting with subtract.
> >
> > It adds a corresponding optab and direct internal function,
> > vec_subadd$a3 and at the moment to make the i386 backend changes
> > "obvious", duplicates the existing avx_addsubv4df3 pattern with
> > the new canonical name (CODE_FOR_* and gen_* are used throughout the
> > intrinsic code, so the actual change to rename all existing patterns
> > will be quite a bit bigger). I expect some bike-shedding on
> > subadd vs. addsub so I delay that change ;)
> >
> > ARC seems to have both addsub{V2HI,V2SI,V4HI} and
> > subadd{V2HI,V2SI,V4HI}. ARM iwmmxt has wsubaddx on V4HI but
> > I didn't dare to decipher whether it's -, + or +, -. bfin
> > has both variants as well plus a saturating variant of both (on v2hi).
> > But none of the major vector ISAs besides x86 seem to have it.
> > Still some targets having both and opting for the naming I choose
> > would make that a better fit IMHO.
> >
> > Uros/Hontao - would mass-renaming of the x86 addsub patterns
> > to subadd be OK? (not touching the FMA ones which have both)
>
> We can add define_expand (this is how we approached the problem in the
> past for x86). If there are not too many of them, then this is the
> preferred approach (see below) ...
OK.
> ... until someone comes along and mass-renames everything to a generic name.
Though now that I tried it isn't so bad for the addsub case (see
patch below). Would that be prefered or would it be an incomplete
renaming and thus the define_expand prefered?
Thanks,
Richard.
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 80c2a2c0294..3222eaedbd0 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -855,8 +855,8 @@ BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0,
CODE_FOR_mmx_subv1di3, "__
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movshdup,
"__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int)
V4SF_FTYPE_V4SF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movsldup,
"__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int)
V4SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv4sf3,
"__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int)
V4SF_FTYPE_V4SF_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv2df3,
"__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int)
V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_subaddv4sf3,
"__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int)
V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_subaddv2df3,
"__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int)
V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv4sf3,
"__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int)
V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv2df3,
"__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int)
V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3,
"__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int)
V4SF_FTYPE_V4SF_V4SF)
@@ -996,8 +996,8 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_pclmulqdq, 0,
IX86_BUILTIN_PCLMULQDQ128
/* AVX */
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv4df3, "__builtin_ia32_addpd256",
IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv8sf3, "__builtin_ia32_addps256",
IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv4df3,
"__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int)
V4DF_FTYPE_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv8sf3,
"__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int)
V8SF_FTYPE_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_subaddv4df3,
"__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int)
V4DF_FTYPE_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_subaddv8sf3,
"__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int)
V8SF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv4df3, "__builtin_ia32_andpd256",
IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv8sf3, "__builtin_ia32_andps256",
IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_andnotv4df3,
"__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int)
V4DF_FTYPE_V4DF_V4DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 73238c9874a..48818dcf8cb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2396,8 +2396,6 @@
(set_attr "prefix" "<round_saeonly_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
-/* ??? Rename avx_addsubv4df3, but its name is used throughout the backed,
- so avoid this churn for the moment. */
(define_insn "vec_subaddv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
@@ -2412,21 +2410,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_insn "avx_addsubv4df3"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_merge:V4DF
- (minus:V4DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (plus:V4DF (match_dup 1) (match_dup 2))
- (const_int 5)))]
- "TARGET_AVX"
- "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "sse3_addsubv2df3"
+(define_insn "vec_subaddv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(minus:V2DF
@@ -2444,7 +2428,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
-(define_insn "avx_addsubv8sf3"
+(define_insn "vec_subaddv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
(minus:V8SF
@@ -2458,7 +2442,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_addsubv4sf3"
+(define_insn "vec_subaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(minus:V4SF