Hello! Attached patch fixes unwanted MMX reg generation on x86-32 by also allowing target to allocate SSE registers for MMX moves. The difference between x86-32 and x86-64 was historic, and is not needed any more. The patch also adds one missing inter-unit move handling to vec_concatv2di.
2017-05-18 Uros Bizjak <ubiz...@gmail.com> PR target/80799 * config/i386/mmx.md (*mov<mode>_internal): Enable alternatives 11, 12, 13 and 14 also for 32bit targets. Remove alternatives 15, 16, 17 and 18. * config/i386/sse.md (vec_concatv2di): Change alternative (!x, *y) to (x, ?!*Yn). testsuite/ChangeLog: 2017-05-18 Uros Bizjak <ubiz...@gmail.com> PR target/80799 * g++.dg/other/i386-11.C: New test. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN, will be backported to release branches in a couple of days. Uros.
Index: config/i386/mmx.md =================================================================== --- config/i386/mmx.md (revision 248239) +++ config/i386/mmx.md (working copy) @@ -78,9 +78,9 @@ (define_insn "*mov<mode>_internal" [(set (match_operand:MMXMODE 0 "nonimmediate_operand" - "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,v,v,v,m,*x,*x,*x,m ,r ,Yi,!Ym,*Yi") + "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!Ym,v,v,v,m,r ,Yi,!Ym,*Yi") (match_operand:MMXMODE 1 "vector_move_operand" - "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,v,m,v,C ,*x,m ,*x,Yj,r ,*Yj,!Yn"))] + "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!Yn,r ,C,v,m,v,Yj,r ,*Yj,!Yn"))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { @@ -146,7 +146,7 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1") (const_string "nox64") - (eq_attr "alternative" "2,3,4,9,10,11,12,13,14,19,20") + (eq_attr "alternative" "2,3,4,9,10,15,16") (const_string "x64") ] (const_string "*"))) @@ -159,14 +159,14 @@ (const_string "mmx") (eq_attr "alternative" "6,7,8,9,10") (const_string "mmxmov") - (eq_attr "alternative" "11,15") + (eq_attr "alternative" "11") (const_string "sselog1") - (eq_attr "alternative" "21,22") + (eq_attr "alternative" "17,18") (const_string "ssecvt") ] (const_string "ssemov"))) (set (attr "prefix_rex") - (if_then_else (eq_attr "alternative" "9,10,19,20") + (if_then_else (eq_attr "alternative" "9,10,15,16") (const_string "1") (const_string "*"))) (set (attr "prefix") @@ -181,7 +181,7 @@ (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") - (eq_attr "alternative" "11,12,15,16") + (eq_attr "alternative" "11,12") (cond [(ior (match_operand 0 "ext_sse_reg_operand") (match_operand 1 "ext_sse_reg_operand")) (const_string "XI") @@ -197,7 +197,7 @@ ] (const_string "TI")) - (and (eq_attr "alternative" "13,14,17,18") + (and (eq_attr "alternative" "13,14") (ior (match_test "<MODE>mode == V2SFmode") (not (match_test "TARGET_SSE2")))) (const_string "V2SF") Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 248239) +++ config/i386/sse.md (working copy) @@ -13863,10 +13863,10 @@ ;; movd instead of movq is required to handle broken assemblers. (define_insn "vec_concatv2di" [(set (match_operand:V2DI 0 "register_operand" - "=Yr,*x,x ,v ,Yi,v ,!x,x,v ,x,x,v") + "=Yr,*x,x ,v ,Yi,v ,x ,x,v ,x,x,v") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" - " 0, 0,x ,Yv,r ,vm,*y,0,Yv,0,0,v") + " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v") (match_operand:DI 2 "vector_move_operand" "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] "TARGET_SSE" Index: testsuite/g++.dg/other/i386-11.C =================================================================== --- testsuite/g++.dg/other/i386-11.C (nonexistent) +++ testsuite/g++.dg/other/i386-11.C (working copy) @@ -0,0 +1,57 @@ +// PR target/80799 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-require-effective-target c++11 } +// { dg-options "-O2 -msse2" } + +#include <xmmintrin.h> +#include <emmintrin.h> + +class alignas(16) GSVector4i +{ +public: + __m128i m; + + explicit GSVector4i(__m128i m) + { + this->m = m; + } + + static void storel(void* p, const GSVector4i& v) + { + _mm_storel_epi64((__m128i*)p, v.m); + } + + static GSVector4i loadl(const void* p) + { + return GSVector4i(_mm_loadl_epi64((__m128i*)p)); + } + + bool eq(const GSVector4i& v) const + { + return _mm_movemask_epi8(_mm_cmpeq_epi32(m, v.m)) == 0xffff; + } +}; + + +union GIFRegTRXPOS +{ + unsigned long long u64; + void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} + bool operator != (const union GIFRegTRXPOS& r) const {return !((GSVector4i)r).eq(*this);} + operator GSVector4i() const {return GSVector4i::loadl(this);} +}; + +extern void dummy_call(); +extern GIFRegTRXPOS TRXPOS; + +void GIFRegHandlerTRXPOS(const GIFRegTRXPOS& p) +{ + if(p != TRXPOS) + { + dummy_call(); + } + + TRXPOS = (GSVector4i)p; +} + +// { dg-final { scan-assembler-not "%mm" } }