Hello! As exposed by r235906 [1], we should not widen DFmode memory access to V2DFmode in the splitter.
Attached patch introduces two new patterns that use correct mode of memory operand. These two patterns are appropriate for the TARGET_SSE_PARTIAL_REG_DEPENDENCY splitters, as they don't need to widen memory access. 2016-05-08 Uros Bizjak <ubiz...@gmail.com> PR target/70998 * config/i386/sse.md (*sse2_vd_cvtsd2ss): New insn pattern. (*sse2_vd_cvtss2sd): Ditto. * config/i386/i386.md (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_truncate df->sf splitter): Generate *sse2_vd_cvtsd2ss pattern. (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_extend sf->df splitter): Generate *sse2_vd_cvtss2sd pattern. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. [1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=235906 Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 236007) +++ config/i386/i386.md (working copy) @@ -5192,13 +5192,12 @@ [(set (match_dup 0) (vec_merge:V4SF (vec_duplicate:V4SF - (float_truncate:V2SF + (float_truncate:SF (match_dup 1))) (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); - operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode); emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); }) @@ -5219,15 +5218,13 @@ || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V2DF - (float_extend:V2DF - (vec_select:V2SF - (match_dup 1) - (parallel [(const_int 0) (const_int 1)]))) - (match_dup 0) + (vec_duplicate:V2DF + (float_extend:DF + (match_dup 1))) + (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); - operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode); emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 236007) +++ config/i386/sse.md (working copy) @@ -4949,6 +4949,27 @@ (set_attr "prefix" "orig,orig,<round_prefix>") (set_attr "mode" "SF")]) +(define_insn "*sse2_vd_cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm"))) + (match_operand:V4SF 1 "register_operand" "0,0,v") + (const_int 1)))] + "TARGET_SSE2" + "@ + cvtsd2ss\t{%2, %0|%0, %2} + cvtsd2ss\t{%2, %0|%0, %2} + vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double,*") + (set_attr "amdfam10_decode" "vector,double,*") + (set_attr "bdver1_decode" "direct,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "SF")]) + (define_insn "sse2_cvtss2sd<round_saeonly_name>" [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") (vec_merge:V2DF @@ -4972,6 +4993,27 @@ (set_attr "prefix" "orig,orig,<round_saeonly_prefix>") (set_attr "mode" "DF")]) +(define_insn "*sse2_vd_cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm"))) + (match_operand:V2DF 1 "register_operand" "0,0,v") + (const_int 1)))] + "TARGET_SSE2" + "@ + cvtss2sd\t{%2, %0|%0, %2} + cvtss2sd\t{%2, %0|%0, %2} + vcvtss2sd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssecvt") + (set_attr "amdfam10_decode" "vector,double,*") + (set_attr "athlon_decode" "direct,direct,*") + (set_attr "bdver1_decode" "direct,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "DF")]) + (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>" [(set (match_operand:V8SF 0 "register_operand" "=v") (float_truncate:V8SF