On Mon, Feb 4, 2019 at 12:23 PM Uros Bizjak <[email protected]> wrote:
>
> On Mon, Feb 4, 2019 at 12:04 PM Richard Biener
> <[email protected]> wrote:
> >
> > On Mon, Feb 4, 2019 at 10:10 AM Uros Bizjak <[email protected]> wrote:
> > >
> > > On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <[email protected]> wrote:
> > > >
> > > > On x86-64, since __m64 is returned and passed in XMM registers, we can
> > > > implement MMX intrinsics with SSE instructions. To support it, we
> > > > disable
> > > > MMX by default in 64-bit mode so that MMX registers won't be available
> > > > with x86-64. Most of MMX instructions have equivalent SSE versions and
> > > > results of some SSE versions need to be reshuffled to the right order
> > > > for MMX. Thee are couple tricky cases:
> > >
> > > I don't think we have to disable MMX registers, but we have to tune
> > > register allocation preferences to not allocate MMX register unless
> > > really necessary. In practice, this means to change y constraints to
> > > *y when TARGET_MMX_WITH_SSE is active (probably using enable
> > > attribute). This would solve problem with assembler clobbers that Andi
> > > exposed.
> >
> > But is "unless really necessary" good enough to not have it wrongly
> > under any circumstance? I actually like HJs patch (not looked at the
> > details though). I'd have gone a more aggressive way of simply defaulting
> > to -mno-mmx without any emulation or whatnot though.
>
> Please see attached *prototype* patch that enables vectorization for
>
> void
> foo (char *restrict r, char *restrict a, char *restrict b)
> {
> int i;
>
> for (i = 0; i < 8; i++)
> r[i] = a[i] + b[i];
> }
>
> with and without -mmmx. The pattern is defined as:
>
> (define_insn "*mmx_<plusminus_insn><mode>3"
> [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
> (plusminus:MMXMODEI8
> (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
> (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
> "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
> "@
> p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
> vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
> [(set_attr "type" "mmxadd")
> (set_attr "mode" "DI")
> (set (attr "enabled")
> (cond [(eq_attr "alternative" "1")
> (symbol_ref "TARGET_MMX_WITH_SSE")
> (eq_attr "alternative" "2")
> (symbol_ref "TARGET_AVX && TARGET_MMX_WITH_SSE")
> ]
> (symbol_ref ("!TARGET_MMX_WITH_SSE"))))])
>
> so, there is no way mmx register gets allocated with
> TARGET_MMX_WITH_SSE. We have had MMX registers enabled in move insns
> for years, and there were no problems with current register
> preferences. So, I'm pretty confident that the above is enough to
> prevent unwanted MMX moves, while still allowing MMX registers.
>
> With the above approach, we can enable TARGET_MMX_WITH_SSE
> unconditionally for 64bit SSE2 targets, since we will still allow MMX
> regs. Please note that there is no requirement to use MMX instructions
> for MMX intrinsics, so we can emit _all_ MMX intrinsics using HJ's
> conversion unconditionally.
Attached is the patch that enables alternatives in a much more convenient way:
(define_insn "*mmx_<plusminus_insn><mode>3"
[(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
(plusminus:MMXMODEI8
(match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
(match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4e67abe87646..62ad919d2596 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -50050,6 +50050,9 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
sizes->safe_push (32);
sizes->safe_push (16);
}
+
+ if (TARGET_MMX_WITH_SSE)
+ sizes->safe_push (8);
}
/* Implemenation of targetm.vectorize.get_mask_mode. */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5d..30e05e9703d4 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,8 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST];
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
+#define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
+
extern unsigned char x86_prefetch_sse;
#define TARGET_PREFETCH_SSE x86_prefetch_sse
@@ -1145,7 +1147,8 @@ extern const char *host_detect_local_cpu (int argc, const
char **argv);
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
- || (MODE) == V2DImode || (MODE) == DFmode)
+ || (MODE) == V2DImode || (MODE) == DFmode \
+ || (TARGET_MMX_WITH_SSE && VALID_MMX_REG_MODE (mode)))
#define VALID_SSE_REG_MODE(MODE) \
((MODE) == V1TImode || (MODE) == TImode \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 744f155fca6f..dd6c04db5e63 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,9 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
(const_string "base"))
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64_noavx,x64_avx" (const_string "base"))
+
(define_attr "enabled" ""
(cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
(eq_attr "isa" "x64_sse2")
@@ -830,6 +833,13 @@
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
(eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+ (eq_attr "mmx_isa" "native")
+ (symbol_ref "!TARGET_MMX_WITH_SSE")
+ (eq_attr "mmx_isa" "x64_avx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+ (eq_attr "mmx_isa" "x64_noavx")
+ (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
]
(const_int 1)))
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c1e0f2c411e6..125b2bf5b373 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
;; 8 byte integral modes handled by MMX (and by extension, SSE)
(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -70,7 +70,7 @@
(define_expand "mov<mode>"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
- "TARGET_MMX"
+ "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (<MODE>mode, operands);
DONE;
@@ -81,7 +81,7 @@
"=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
"rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
- "TARGET_MMX
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -690,18 +690,30 @@
(plusminus:MMXMODEI8
(match_operand:MMXMODEI8 1 "nonimmediate_operand")
(match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
- "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODEI8 0 "register_operand")
+ (plusminus:MMXMODEI8
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
+ "TARGET_MMX_WITH_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<plusminus_insn><mode>3"
- [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
- "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))]
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
+ "@
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+ vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+ (set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
(define_expand "mmx_<plusminus_insn><mode>3"