Hello! Attached patch (partially) avoids emitting XOR dependency breaking insn by removing SSE reg dependency in the AVX instructions themselves.
2019-01-31 Uroš Bizjak <ubiz...@gmail.com> PR target/89071 * config/i386/i386.md (*extendsfdf2): Split out reg->reg alternative to avoid partial SSE register stall for TARGET_AVX. (truncdfsf2): Ditto. (sse4_1_round<mode>2): Ditto. Bootstrapped on x86_64-linux-gnu {,-m32}, regression test in progress. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d085e88bc61d..744f155fca6f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4370,9 +4370,9 @@ }) (define_insn "*extendsfdf2" - [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v") (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] + (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) @@ -4382,15 +4382,17 @@ return output_387_reg_move (insn, operands); case 2: + return "%vcvtss2sd\t{%d1, %0|%0, %d1}"; + case 3: return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,ssecvt") - (set_attr "prefix" "orig,orig,maybe_vex") - (set_attr "mode" "SF,XF,DF") + [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SF,XF,DF,DF") (set (attr "enabled") (if_then_else (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) @@ -4481,7 +4483,7 @@ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) + || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) @@ -4534,9 +4536,9 @@ ;; Conversion from DFmode to SFmode. (define_insn "truncdfsf2" - [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v") + [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v") (float_truncate:SF - (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))] + (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) @@ -4546,13 +4548,15 @@ return output_387_reg_move (insn, operands); case 2: + return "%vcvtsd2ss\t{%d1, %0|%0, %d1}"; + case 3: return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,ssecvt") + [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") (set_attr "mode" "SF") (set (attr "enabled") (if_then_else @@ -4639,7 +4643,7 @@ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) + || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) @@ -16171,19 +16175,20 @@ (define_insn "sse4_1_round<mode>2" - [(set (match_operand:MODEF 0 "register_operand" "=x,v") - (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm") - (match_operand:SI 2 "const_0_to_15_operand" "n,n")] + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v") + (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm") + (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")] UNSPEC_ROUND))] "TARGET_SSE4_1" "@ + %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2} %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2} vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1,*") - (set_attr "length_immediate" "*,1") - (set_attr "prefix" "maybe_vex,evex") - (set_attr "isa" "noavx512f,avx512f") + (set_attr "prefix_extra" "1,1,*") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "maybe_vex,maybe_vex,evex") + (set_attr "isa" "noavx512f,noavx512f,avx512f") (set_attr "mode" "<MODE>")]) (define_insn "rintxf2"