On Mon, Feb 4, 2019 at 12:23 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Mon, Feb 4, 2019 at 12:04 PM Richard Biener > <richard.guent...@gmail.com> wrote: > > > > On Mon, Feb 4, 2019 at 10:10 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Fri, Feb 1, 2019 at 10:18 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > On x86-64, since __m64 is returned and passed in XMM registers, we can > > > > implement MMX intrinsics with SSE instructions. To support it, we > > > > disable > > > > MMX by default in 64-bit mode so that MMX registers won't be available > > > > with x86-64. Most of MMX instructions have equivalent SSE versions and > > > > results of some SSE versions need to be reshuffled to the right order > > > > for MMX. Thee are couple tricky cases: > > > > > > I don't think we have to disable MMX registers, but we have to tune > > > register allocation preferences to not allocate MMX register unless > > > really necessary. In practice, this means to change y constraints to > > > *y when TARGET_MMX_WITH_SSE is active (probably using enable > > > attribute). This would solve problem with assembler clobbers that Andi > > > exposed. > > > > But is "unless really necessary" good enough to not have it wrongly > > under any circumstance? I actually like HJs patch (not looked at the > > details though). I'd have gone a more aggressive way of simply defaulting > > to -mno-mmx without any emulation or whatnot though. > > Please see attached *prototype* patch that enables vectorization for > > void > foo (char *restrict r, char *restrict a, char *restrict b) > { > int i; > > for (i = 0; i < 8; i++) > r[i] = a[i] + b[i]; > } > > with and without -mmmx. The pattern is defined as: > > (define_insn "*mmx_<plusminus_insn><mode>3" > [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v") > (plusminus:MMXMODEI8 > (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v") > (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))] > "(TARGET_MMX || TARGET_MMX_WITH_SSE) > && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" > "@ > p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} > p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} > vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "mmxadd") > (set_attr "mode" "DI") > (set (attr "enabled") > (cond [(eq_attr "alternative" "1") > (symbol_ref "TARGET_MMX_WITH_SSE") > (eq_attr "alternative" "2") > (symbol_ref "TARGET_AVX && TARGET_MMX_WITH_SSE") > ] > (symbol_ref ("!TARGET_MMX_WITH_SSE"))))]) > > so, there is no way mmx register gets allocated with > TARGET_MMX_WITH_SSE. We have had MMX registers enabled in move insns > for years, and there were no problems with current register > preferences. So, I'm pretty confident that the above is enough to > prevent unwanted MMX moves, while still allowing MMX registers. > > With the above approach, we can enable TARGET_MMX_WITH_SSE > unconditionally for 64bit SSE2 targets, since we will still allow MMX > regs. Please note that there is no requirement to use MMX instructions > for MMX intrinsics, so we can emit _all_ MMX intrinsics using HJ's > conversion unconditionally.
Attached is the patch that enables alternatives in a much more convenient way: (define_insn "*mmx_<plusminus_insn><mode>3" [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v") (plusminus:MMXMODEI8 (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v") (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") (set_attr "type" "mmxadd") (set_attr "mode" "DI")]) Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4e67abe87646..62ad919d2596 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -50050,6 +50050,9 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes) sizes->safe_push (32); sizes->safe_push (16); } + + if (TARGET_MMX_WITH_SSE) + sizes->safe_push (8); } /* Implemenation of targetm.vectorize.get_mask_mode. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 83b025e0cf5d..30e05e9703d4 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -585,6 +585,8 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST]; #define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387) +#define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) + extern unsigned char x86_prefetch_sse; #define TARGET_PREFETCH_SSE x86_prefetch_sse @@ -1145,7 +1147,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ - || (MODE) == V2DImode || (MODE) == DFmode) + || (MODE) == V2DImode || (MODE) == DFmode \ + || (TARGET_MMX_WITH_SSE && VALID_MMX_REG_MODE (mode))) #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 744f155fca6f..dd6c04db5e63 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -792,6 +792,9 @@ avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw" (const_string "base")) +;; Define instruction set of MMX instructions +(define_attr "mmx_isa" "base,native,x64_noavx,x64_avx" (const_string "base")) + (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") (eq_attr "isa" "x64_sse2") @@ -830,6 +833,13 @@ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") + + (eq_attr "mmx_isa" "native") + (symbol_ref "!TARGET_MMX_WITH_SSE") + (eq_attr "mmx_isa" "x64_avx") + (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") + (eq_attr "mmx_isa" "x64_noavx") + (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") ] (const_int 1))) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c1e0f2c411e6..125b2bf5b373 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -45,7 +45,7 @@ ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) -(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI]) +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) @@ -70,7 +70,7 @@ (define_expand "mov<mode>" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (<MODE>mode, operands); DONE; @@ -81,7 +81,7 @@ "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") (match_operand:MMXMODE 1 "nonimm_or_0_operand" "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] - "TARGET_MMX + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -690,18 +690,30 @@ (plusminus:MMXMODEI8 (match_operand:MMXMODEI8 1 "nonimmediate_operand") (match_operand:MMXMODEI8 2 "nonimmediate_operand")))] - "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)" + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_expand "<plusminus_insn><mode>3" + [(set (match_operand:MMXMODEI8 0 "register_operand") + (plusminus:MMXMODEI8 + (match_operand:MMXMODEI8 1 "nonimmediate_operand") + (match_operand:MMXMODEI8 2 "nonimmediate_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*mmx_<plusminus_insn><mode>3" - [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,v") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0") - (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] - "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)) + (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0,0,v") + (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,v")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") + "@ + p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} + p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2} + vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd") (set_attr "mode" "DI")]) (define_expand "mmx_<plusminus_insn><mode>3"