Richard, I am trying to make sure that when vcond has {-1} and {0} it does not trigger masking. Currently I am doing this:
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 177665) +++ config/i386/i386.c (working copy) @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. #include "tm.h" #include "rtl.h" #include "tree.h" +#include "tree-flow.h" #include "tm_p.h" #include "regs.h" #include "hard-reg-set.h" @@ -18434,7 +18435,30 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp { enum machine_mode mode = GET_MODE (dest); rtx t2, t3, x; - + rtx mask_true; + + rtvec v; + int units, i; + enum machine_mode inner; + + units = GET_MODE_NUNITS (mode); + inner = GET_MODE_INNER (mode); + v = rtvec_alloc (units); + for (i = 0; i < units; ++i) + RTVEC_ELT (v, i) = gen_rtx_CONST_INT (inner, -1); + + mask_true = gen_rtx_raw_CONST_VECTOR (mode, v); + + fprintf (stderr, "I am here\n"); + debug_rtx (mask_true); + debug_rtx (op_true); + if (rtx_equal_p (op_true, mask_true)) + { + fprintf (stderr, "Yes it is\n"); + emit_insn (gen_rtx_SET (VOIDmode, dest, cmp)); + return; + } + else if (op_false == CONST0_RTX (mode)) { op_true = force_reg (mode, op_true); It works out the case when mask is -1 very well, however in the code generated by the expansion I still see excessive operations: ires = i0 < i1 ? (vector (4, int)){-1,-1,-1,-1} : (vector (4, int)){0,0,0,0}; expands to: pcmpgtd %xmm1, %xmm0 pcmpeqd %xmm1, %xmm1 pcmpeqd %xmm1, %xmm0 movdqa %xmm0, -24(%rsp) Where the code ires = i0 < i1; using my hook expands to: pcmpgtd %xmm1, %xmm0 movdqa %xmm0, -24(%rsp) So someone is putting two extra instructions there, and I cannot really figure out who is doing that. Anyone knows how could I fix this... Artem.