Hello! Attached patch rewrites float patterns. When disabled by X87_ENABLE_FLOAT macro, generic expanders are able to correctly use wider FPmode patterns and float truncation sequence, so there is no need to do the same functionality manually in the expander.
The patch also re-enables floatdi<X87MODEF:mode>2_i387_with_xmm pattern, so we are able to avoid partial register stalls when compiling DImode float conversion on 32bit targets, e.g.: double test (long long x) { return x + 1; } -O2 -m32 -msse2 -mtune=intel -mfpmath=387 produces following code: addl $1, %eax movd %eax, %xmm0 adcl $0, %edx movd %edx, %xmm1 punpckldq %xmm1, %xmm0 movq %xmm0, (%esp) fildq (%esp) 2018-09-07 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (float<SWI48x:mode><MODEF:mode>2) Enable DImode for x87 on 32bit targets. Conditionally disable x87 modes with X87_ENABLE_FLOAT. Remove preparation code. (*float<SWI48:mode><MODEF:mode>2): Rename from *float<SWI48:mode><MODEF:mode>2_mixed. Handle x87, SSE and mixed math using "enabled" attribute. (*floatdi<MODEF:mode>2_i387): Rename from *float<SWI48x:mode><MODEF:mode>2_i387. Handle only DImode and enable for 32bit targets only. (floatdi<X87MODEF:mode>2_i387_with_xmm pre-reload splitter): New splitter. (floatdi<X87MODEF:mode>2_i387_with_xmm): Use register_operand as operand 1 predicate. Rewrite as define_insn_and_split. (floatdi<X87MODEF:mode>2_i387_with_xmm memory input splitter): Remove. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a35c8a55cc6..cf920db27d7 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5063,36 +5063,19 @@ (set_attr "znver1_decode" "double") (set_attr "fp_int_src" "true")]) -(define_expand "float<SWI48:mode><MODEF:mode>2" +(define_expand "float<SWI48x:mode><MODEF:mode>2" [(set (match_operand:MODEF 0 "register_operand") - (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] - "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)" -{ - if (!(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) - && !X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)) - { - rtx reg = gen_reg_rtx (XFmode); - rtx (*insn)(rtx, rtx); - - emit_insn (gen_float<SWI48:mode>xf2 (reg, operands[1])); + (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH + && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))") - if (<MODEF:MODE>mode == SFmode) - insn = gen_truncxfsf2; - else if (<MODEF:MODE>mode == DFmode) - insn = gen_truncxfdf2; - else - gcc_unreachable (); - - emit_insn (insn (operands[0], reg)); - DONE; - } -}) - -(define_insn "*float<SWI48:mode><MODEF:mode>2_mixed" +(define_insn "*float<SWI48:mode><MODEF:mode>2" [(set (match_operand:MODEF 0 "register_operand" "=f,v,v") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))] - "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH" + "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)" "@ fild%Z1\t%1 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1} @@ -5113,21 +5096,28 @@ (set_attr "znver1_decode" "double,*,*") (set_attr "fp_int_src" "true") (set (attr "enabled") - (cond [(eq_attr "alternative" "0") - (symbol_ref "TARGET_MIX_SSE_I387 - && X87_ENABLE_FLOAT (<MODEF:MODE>mode, - <SWI48:MODE>mode)") - ] - (symbol_ref "true"))) + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "TARGET_MIX_SSE_I387 + && X87_ENABLE_FLOAT (<MODEF:MODE>mode, + <SWI48:MODE>mode)") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "true") + (symbol_ref "false")))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")] (symbol_ref "true")))]) -(define_insn "*float<SWI48x:mode><MODEF:mode>2_i387" +(define_insn "*floatdi<MODEF:mode>2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f") - (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))] - "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)" + (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)" "fild%Z1\t%1" [(set_attr "type" "fmov") (set_attr "mode" "<MODEF:MODE>") @@ -5242,32 +5232,34 @@ ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ -(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm" - [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") - (float:X87MODEF - (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) - (clobber (match_scratch:V4SI 3 "=X,x")) - (clobber (match_scratch:V4SI 4 "=X,x")) - (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] +(define_split + [(set (match_operand:X87MODEF 0 "register_operand") + (float:X87MODEF + (match_operand:DI 1 "register_operand")))] "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" - "#" - [(set_attr "type" "multi") - (set_attr "mode" "<X87MODEF:MODE>") - (set_attr "unit" "i387") - (set_attr "fp_int_src" "true")]) + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_floatdi<mode>2_i387_with_xmm + (operands[0], operands[1], + assign_386_stack_local (DImode, SLOT_TEMP))); + DONE; +}) -(define_split - [(set (match_operand:X87MODEF 0 "fp_register_operand") - (float:X87MODEF (match_operand:DI 1 "register_operand"))) - (clobber (match_scratch:V4SI 3)) - (clobber (match_scratch:V4SI 4)) - (clobber (match_operand:DI 2 "memory_operand"))] +(define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:DI 1 "register_operand" "r"))) + (clobber (match_scratch:V4SI 3 "=x")) + (clobber (match_scratch:V4SI 4 "=x")) + (clobber (match_operand:DI 2 "memory_operand" "=m"))] "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun) - && reload_completed" + && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" [(set (match_dup 2) (match_dup 3)) (set (match_dup 0) (float:X87MODEF (match_dup 2)))] { @@ -5281,19 +5273,11 @@ operands[4])); operands[3] = gen_lowpart (DImode, operands[3]); -}) - -(define_split - [(set (match_operand:X87MODEF 0 "fp_register_operand") - (float:X87MODEF (match_operand:DI 1 "memory_operand"))) - (clobber (match_scratch:V4SI 3)) - (clobber (match_scratch:V4SI 4)) - (clobber (match_operand:DI 2 "memory_operand"))] - "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) - && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC - && !TARGET_64BIT && optimize_function_for_speed_p (cfun) - && reload_completed" - [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) +} + [(set_attr "type" "multi") + (set_attr "mode" "<X87MODEF:MODE>") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) (define_expand "floatuns<SWI12:mode><MODEF:mode>2" [(set (match_operand:MODEF 0 "register_operand")