Hi Maintainers, Please find the patch below that backports PR target/52908 to GCC 4.7.
The patch passed bootstrap and regression test. Ok to commit? regards, Venkat. Index: ChangeLog =================================================================== --- ChangeLog (revision 187449) +++ ChangeLog (working copy) @@ -1,3 +1,17 @@ +2012-06-07 Venkataramanan Kumar <venkataramanan.ku...@amd.com> + + Backport from 2012-05-09 mainline r187354 + + PR target/52908 + * config/i386/sse.md (vec_widen_smult_hi_v4si): Expand using + xop_pmacsdqh insn pattern instead of xop_mulv2div2di3_high. + (vec_widen_smult_lo_v4si): Expand using xop_pmacsdql insn pattern + instead of xop_mulv2div2di3_low. + (xop_p<macs>dql): Fix vec_select selector. + (xop_p<macs>dqh): Ditto. + (xop_mulv2div2di3_low): Remove insn_and_split pattern. + (xop_mulv2div2di3_high): Ditto. + 2012-05-13 Uros Bizjak <ubiz...@gmail.com> Backport from mainline Index: testsuite/gcc.target/i386/xop-imul32widen-vector.c =================================================================== --- testsuite/gcc.target/i386/xop-imul32widen-vector.c (revision 187449) +++ testsuite/gcc.target/i386/xop-imul32widen-vector.c (working copy) @@ -32,5 +32,5 @@ exit (0); } -/* { dg-final { scan-assembler "vpmacsdql" } } */ +/* { dg-final { scan-assembler "vpmuldq" } } */ /* { dg-final { scan-assembler "vpmacsdqh" } } */ Index: testsuite/ChangeLog =================================================================== --- testsuite/ChangeLog (revision 187449) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,11 @@ +2012-06-07 Venkataramanan Kumar <venkataramanan.ku...@amd.com> + + Back port from 2012-05-09 mainline r187354 + + PR target/52908 + * gcc.target/i386/xop-imul32widen-vector.c: Update scan-assembler + directive to Scan for vpmuldq, not vpmacsdql. + 2012-05-12 Eric Botcazou <ebotca...@adacore.com> * gnat.dg/null_pointer_deref3.adb: New test. Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 187449) +++ config/i386/sse.md (working copy) @@ -5743,11 +5743,15 @@ if (TARGET_XOP) { + rtx t3 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + emit_move_insn (t3, CONST0_RTX (V2DImode)); + + emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3)); DONE; } @@ -5772,11 +5776,15 @@ if (TARGET_XOP) { + rtx t3 = gen_reg_rtx (V2DImode); + emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), GEN_INT (1), GEN_INT (3))); - emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + emit_move_insn (t3, CONST0_RTX (V2DImode)); + + emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3)); DONE; } @@ -10443,12 +10451,12 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (vec_select:V2SI + (parallel [(const_int 0) + (const_int 2)]))) + (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 0) + (const_int 2)]))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -10462,13 +10470,13 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 1) + (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 1) + (const_int 3)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" @@ -10482,61 +10490,19 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 0) + (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))) + (parallel [(const_int 0) + (const_int 2)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply and extend on XOP, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as operands 1/2 -(define_insn_and_split "xop_mulv2div2di3_low" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemul") - (set_attr "mode" "TI")]) - (define_insn "xop_pmacsdqh" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI @@ -10544,61 +10510,19 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 1) + (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 1) + (const_int 3)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply and extend on XOP, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as either operands[1] or operands[2] -(define_insn_and_split "xop_mulv2div2di3_high" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemul") - (set_attr "mode" "TI")]) - ;; XOP parallel integer multiply/add instructions for the intrinisics (define_insn "xop_pmacsswd" [(set (match_operand:V4SI 0 "register_operand" "=x")