Uros noted that STV with !TImode isn't supposed to run before combine. The following adjusts things accordingly and now the pass runs twice for TARGET_64BIT. I've also adjusted another gpr->xmm move to use (vec_merge (vec_duplicate..)) style rather than using a subreg. This isn't strictly neccesary to fix the bug though and my previous needs to do this might have been caused by the pass running too early.
So - with or without this consistency part? Bootstrap / regtest running on x86_64-unknown-linux-gnu, OK? Thanks, Richard. 2019-08-19 Richard Biener <rguent...@suse.de> PR target/91154 * config/i386/i386-features.c (general_scalar_chain::convert_op): Use (vec_merge (vec_duplicate..)) style vector from scalar move. (convert_scalars_to_vector): Add timode_p parameter and use it to guard TImode-only operation. (pass_stv::gate): Adjust so STV runs twice for TARGET_64BIT. (pass_stv::execute): Pass down timode_p. * gcc.target/i386/minmax-7.c: New testcase. Index: gcc/config/i386/i386-features.c =================================================================== --- gcc/config/i386/i386-features.c (revision 274666) +++ gcc/config/i386/i386-features.c (working copy) @@ -910,7 +910,9 @@ general_scalar_chain::convert_op (rtx *o { rtx tmp = gen_reg_rtx (GET_MODE (*op)); - emit_insn_before (gen_move_insn (tmp, *op), insn); + emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0), + gen_gpr_to_xmm_move_src (vmode, *op)), + insn); *op = gen_rtx_SUBREG (vmode, tmp, 0); if (dump_file) @@ -1664,7 +1666,7 @@ timode_remove_non_convertible_regs (bitm instructions into vector mode when profitable. */ static unsigned int -convert_scalars_to_vector () +convert_scalars_to_vector (bool timode_p) { basic_block bb; int converted_insns = 0; @@ -1690,7 +1692,7 @@ convert_scalars_to_vector () { rtx_insn *insn; FOR_BB_INSNS (bb, insn) - if (TARGET_64BIT + if (timode_p && timode_scalar_to_vector_candidate_p (insn)) { if (dump_file) @@ -1699,7 +1701,7 @@ convert_scalars_to_vector () bitmap_set_bit (&candidates[2], INSN_UID (insn)); } - else + else if (!timode_p) { /* Check {SI,DI}mode. */ for (unsigned i = 0; i <= 1; ++i) @@ -1715,7 +1717,7 @@ convert_scalars_to_vector () } } - if (TARGET_64BIT) + if (timode_p) timode_remove_non_convertible_regs (&candidates[2]); for (unsigned i = 0; i <= 1; ++i) general_remove_non_convertible_regs (&candidates[i]); @@ -1875,13 +1877,13 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return (timode_p == !!TARGET_64BIT + return ((!timode_p || TARGET_64BIT) && TARGET_STV && TARGET_SSE2 && optimize > 1); } virtual unsigned int execute (function *) { - return convert_scalars_to_vector (); + return convert_scalars_to_vector (timode_p); } opt_pass *clone () Index: gcc/testsuite/gcc.target/i386/minmax-7.c =================================================================== --- gcc/testsuite/gcc.target/i386/minmax-7.c (nonexistent) +++ gcc/testsuite/gcc.target/i386/minmax-7.c (working copy) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=haswell" } */ + +extern int numBins; +extern int binOffst; +extern int binWidth; +extern int Trybin; +void foo (int); + +void bar (int aleft, int axcenter) +{ + int a1LoBin = (((Trybin=((axcenter + aleft)-binOffst)/binWidth)<0) + ? 0 : ((Trybin>numBins) ? numBins : Trybin)); + foo (a1LoBin); +} + +/* We do not want the RA to spill %esi for it's dual-use but using + pminsd is OK. */ +/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */ +/* { dg-final { scan-assembler "pminsd" } } */