Re: [PATCH 00/46] Implement MMX intrinsics with SSE

Uros Bizjak Mon, 04 Feb 2019 05:01:42 -0800

On Mon, Feb 4, 2019 at 12:23 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Mon, Feb 4, 2019 at 12:04 PM Richard Biener
> <richard.guent...@gmail.com> wrote:
> >
> > On Mon, Feb 4, 2019 at 10:10 AM Uros Bizjak <ubiz...@gmail.com> wrote:
> > >
> > > On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> > > >
> > > > On x86-64, since __m64 is returned and passed in XMM registers, we can
> > > > implement MMX intrinsics with SSE instructions. To support it, we 
> > > > disable
> > > > MMX by default in 64-bit mode so that MMX registers won't be available
> > > > with x86-64.  Most of MMX instructions have equivalent SSE versions and
> > > > results of some SSE versions need to be reshuffled to the right order
> > > > for MMX.  Thee are couple tricky cases:
> > >
> > > I don't think we have to disable MMX registers, but we have to tune
> > > register allocation preferences to not allocate MMX register unless
> > > really necessary. In practice, this means to change y constraints to
> > > *y when TARGET_MMX_WITH_SSE is active (probably using enable
> > > attribute). This would solve problem with assembler clobbers that Andi
> > > exposed.
> >
> > But is "unless really necessary" good enough to not have it wrongly
> > under any circumstance?  I actually like HJs patch (not looked at the
> > details though).  I'd have gone a more aggressive way of simply defaulting
> > to -mno-mmx without any emulation or whatnot though.
>
> Please see attached *prototype* patch that enables vectorization for
>
> void
> foo (char *restrict r, char *restrict a, char *restrict b)
> {
>   int i;
>
>   for (i = 0; i < 8; i++)
>     r[i] = a[i] + b[i];
> }
>
> with and without -mmmx. The pattern is defined as:
>
> (define_insn "*mmx_<plusminus_insn><mode>3"
>   [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
>         (plusminus:MMXMODEI8
>       (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
>       (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
>   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
>    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
>   "@
>    p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
>    p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
>    vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
>   [(set_attr "type" "mmxadd")
>    (set_attr "mode" "DI")
>    (set (attr "enabled")
>      (cond [(eq_attr "alternative" "1")
>           (symbol_ref "TARGET_MMX_WITH_SSE")
>         (eq_attr "alternative" "2")
>             (symbol_ref "TARGET_AVX && TARGET_MMX_WITH_SSE")
>          ]
>          (symbol_ref ("!TARGET_MMX_WITH_SSE"))))])
>
> so, there is no way mmx register gets allocated with
> TARGET_MMX_WITH_SSE. We have had MMX registers enabled in move insns
> for years, and there were no problems with current register
> preferences. So, I'm pretty confident that the above is enough to
> prevent unwanted MMX moves, while still allowing MMX registers.
>
> With the above approach, we can enable TARGET_MMX_WITH_SSE
> unconditionally for 64bit SSE2 targets, since we will still allow MMX
> regs. Please note that there is no requirement to use MMX instructions
> for MMX intrinsics, so we can emit _all_ MMX intrinsics using HJ's
> conversion unconditionally.


Attached is the patch that enables alternatives in a much more convenient way:

(define_insn "*mmx_<plusminus_insn><mode>3"
  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
        (plusminus:MMXMODEI8
      (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
      (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
  "@
   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
   (set_attr "type" "mmxadd")
   (set_attr "mode" "DI")])

Uros.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4e67abe87646..62ad919d2596 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -50050,6 +50050,9 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
       sizes->safe_push (32);
       sizes->safe_push (16);
     }
+
+  if (TARGET_MMX_WITH_SSE)
+    sizes->safe_push (8);
 }
 
 /* Implemenation of targetm.vectorize.get_mask_mode.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5d..30e05e9703d4 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,8 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 
 #define TARGET_FISTTP          (TARGET_SSE3 && TARGET_80387)
 
+#define TARGET_MMX_WITH_SSE    (TARGET_64BIT && TARGET_SSE2)
+
 extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE    x86_prefetch_sse
 
@@ -1145,7 +1147,8 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
 
 #define VALID_SSE2_REG_MODE(MODE)                                      \
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode                           \
+   || (TARGET_MMX_WITH_SSE && VALID_MMX_REG_MODE (mode)))
 
 #define VALID_SSE_REG_MODE(MODE)                                       \
   ((MODE) == V1TImode || (MODE) == TImode                              \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 744f155fca6f..dd6c04db5e63 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,9 @@
                    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64_noavx,x64_avx" (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
         (eq_attr "isa" "x64_sse2")
@@ -830,6 +833,13 @@
         (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
         (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
         (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+        (eq_attr "mmx_isa" "native")
+          (symbol_ref "!TARGET_MMX_WITH_SSE")
+        (eq_attr "mmx_isa" "x64_avx")
+          (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+        (eq_attr "mmx_isa" "x64_noavx")
+          (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
        ]
        (const_int 1)))
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c1e0f2c411e6..125b2bf5b373 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -70,7 +70,7 @@
 (define_expand "mov<mode>"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
        (match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
   DONE;
@@ -81,7 +81,7 @@
     "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
        (match_operand:MMXMODE 1 "nonimm_or_0_operand"
     "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -690,18 +690,30 @@
        (plusminus:MMXMODEI8
          (match_operand:MMXMODEI8 1 "nonimmediate_operand")
          (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
-  "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI8 0 "register_operand")
+       (plusminus:MMXMODEI8
+         (match_operand:MMXMODEI8 1 "nonimmediate_operand")
+         (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*mmx_<plusminus_insn><mode>3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
         (plusminus:MMXMODEI8
-         (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
-         (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+         (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
+         (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
+  "@
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
 (define_expand "mmx_<plusminus_insn><mode>3"

Re: [PATCH 00/46] Implement MMX intrinsics with SSE

Reply via email to