Attached WIP patch enables auto-vectorization of basic V2SF operations (plus, minus, mult, min/max). The compiler takes care that everything is loaded from memory via movq insn, so top two registers always remain zero.
We could probably vectorize some more operations (horizontal add, horizontal sub, addsub, and conversions). Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b40f443ba8a..d1c0e354162 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21007,9 +21007,11 @@ ix86_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) return true; - if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) + if ((TARGET_MMX || TARGET_MMX_WITH_SSE) + && VALID_MMX_REG_MODE (mode)) return true; - if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) + if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) + && VALID_MMX_REG_MODE_3DNOW (mode)) return true; return false; } diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 472f90f9bc1..2d6fe9d1350 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -175,7 +175,13 @@ ] (const_string "TI")) - (and (eq_attr "alternative" "13,14") + (and (eq_attr "alternative" "13") + (ior (and (match_test "<MODE>mode == V2SFmode") + (not (match_test "TARGET_MMX_WITH_SSE"))) + (not (match_test "TARGET_SSE2")))) + (const_string "V2SF") + + (and (eq_attr "alternative" "14") (ior (match_test "<MODE>mode == V2SFmode") (not (match_test "TARGET_SSE2")))) (const_string "V2SF") @@ -240,15 +246,30 @@ "TARGET_3DNOW" "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);") +(define_expand "addv2sf3" + [(set (match_operand:V2SF 0 "register_operand") + (plus:V2SF + (match_operand:V2SF 1 "register_operand") + (match_operand:V2SF 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);") + (define_insn "*mmx_addv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" - "pfadd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") + (plus:V2SF + (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_3DNOW || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (PLUS, V2SFmode, operands)" + "@ + pfadd\t{%2, %0|%0, %2} + addps\t{%2, %0|%0, %2} + vaddps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "prefix_extra" "1,*,*") + (set_attr "mode" "V2SF,V4SF,V4SF")]) (define_expand "mmx_subv2sf3" [(set (match_operand:V2SF 0 "register_operand") @@ -262,17 +283,31 @@ (match_operand:V2SF 1 "nonimmediate_operand")))] "TARGET_3DNOW") +(define_expand "subv2sf3" + [(set (match_operand:V2SF 0 "register_operand") + (minus:V2SF + (match_operand:V2SF 1 "register_operand") + (match_operand:V2SF 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (MINUS, V2SFmode, operands);") + (define_insn "*mmx_subv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y,y") - (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym") - (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))] - "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + [(set (match_operand:V2SF 0 "register_operand" "=y,y,x,Yv") + (minus:V2SF + (match_operand:V2SF 1 "register_mmxmem_operand" "0,ym,0,Yv") + (match_operand:V2SF 2 "register_mmxmem_operand" "ym,0,x,Yv")))] + "(TARGET_3DNOW || TARGET_MMX_WITH_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ pfsub\t{%2, %0|%0, %2} - pfsubr\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxadd") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + pfsubr\t{%1, %0|%0, %1} + subps\t{%2, %0|%0, %2} + vsubps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,native,*,*") + (set_attr "type" "mmxadd,mmxadd,sseadd,sseadd") + (set_attr "prefix_extra" "1,1,*,*") + (set_attr "mode" "V2SF,V2SF,V4SF,V4SF")]) (define_expand "mmx_mulv2sf3" [(set (match_operand:V2SF 0 "register_operand") @@ -281,15 +316,30 @@ "TARGET_3DNOW" "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);") +(define_expand "mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand") + (mult:V2SF + (match_operand:V2SF 1 "register_operand") + (match_operand:V2SF 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);") + (define_insn "*mmx_mulv2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)" - "pfmul\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") + (mult:V2SF + (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_3DNOW || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (MULT, V2SFmode, operands)" + "@ + pfmul\t{%2, %0|%0, %2} + mulps\t{%2, %0|%0, %2} + vmulps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "prefix_extra" "1,*,*") + (set_attr "mode" "V2SF,V4SF,V4SF")]) (define_expand "mmx_<code>v2sf3" [(set (match_operand:V2SF 0 "register_operand") @@ -309,21 +359,43 @@ ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands); }) +(define_expand "<code>v2sf3" + [(set (match_operand:V2SF 0 "register_operand") + (smaxmin:V2SF + (match_operand:V2SF 1 "register_operand") + (match_operand:V2SF 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" +{ + if (!flag_finite_math_only || flag_signed_zeros) + { + emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3 + (operands[0], operands[1], operands[2])); + DONE; + } + else + ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands); +}) + ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. (define_insn "*mmx_<code>v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") (smaxmin:V2SF - (match_operand:V2SF 1 "nonimmediate_operand" "%0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)" - "pf<maxmin_float>\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_3DNOW || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)" + "pf<maxmin_float>\t{%2, %0|%0, %2} + <maxmin_float>ps\t{%2, %0|%0, %2} + v<maxmin_float>ps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "prefix_extra" "1,*,*") + (set_attr "mode" "V2SF,V4SF,V4SF")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) @@ -332,16 +404,20 @@ ;; presence of -0.0 and NaN. (define_insn "mmx_ieee_<ieee_maxmin>v2sf3" - [(set (match_operand:V2SF 0 "register_operand" "=y") + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") (unspec:V2SF - [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + [(match_operand:V2SF 1 "register_operand" "0,0,Yv") + (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")] IEEE_MAXMIN))] - "TARGET_3DNOW" - "pf<ieee_maxmin>\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + "TARGET_3DNOW || TARGET_MMX_WITH_SSE" + "pf<ieee_maxmin>\t{%2, %0|%0, %2} + <ieee_maxmin>ps\t{%2, %0|%0, %2} + v<ieee_maxmin>ps\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "prefix_extra" "1,*,*") + (set_attr "mode" "V2SF,V4SF,V4SF")]) (define_insn "mmx_rcpv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y")