Hello!

As exposed by corner cases in PR 89902 and PR 89903, we can't always
reliably convert variable DImode shifts from a scalar to a vector
instruction. The problem is in count register of a SSE vector shift,
where the full DImode value is considered as a shift argument. Scalar
operations with general register set use size-masked value of a QImode
count register, so we have to zero-extend count operand from QImode to
DImode if we want to fully emulate scalar instruction with a vector
instruction.
The STV infrastructure does not correctly handle corner cases where
shift operand and count operand are matched, e.g.: "psllq %xmm0,
%xmm0", it substitutes every instance with its V2DImode subreg
representation. It can happen that a DImode result of a previous
vector operation is passed as a count operand without clearing high
bits outside QImode subreg, leading to runtime failures (n.b.: SSE
shifts don't mask count operand).

Attached patch removes unreliable support to STV variable DImode shifts.

2019-04-02  Uroš Bizjak  <ubiz...@gmail.com>

    PR target/89902
    PR target/89903
    * config/i386/i386.c (dimode_scalar_to_vector_candidate_p):
    Return false for variable DImode shifts.
    (dimode_scalar_chain::compute_convert_gain): Do not handle
    register count operand in variable DImode shifts.
    (dimode_scalar_chain::make_vector_copies): Remove support to copy
    count argument of a variable shift instruction to a vector register.
    (dimode_scalar_chain::convert_reg): Remove support to convert
    count argument of a variable shift instruction.

testsuite/ChangeLog:

2019-04-02  Uroš Bizjak  <ubiz...@gmail.com>

    PR target/89902
    PR target/89903
    * gcc.target/i386/pr70799-4.c: Remove.
    * gcc.target/i386/pr70799-5.c: Remove.
    * gcc.target/i386/pr89902.c: New test.
    * gcc.target/i386/pr89903.c: Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

I plan to commit the patch later today to mainline and gcc-8 branch.

Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6926085fdd2f..a7544946e0ad 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1058,16 +1058,8 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
     case ASHIFT:
     case LSHIFTRT:
-      if (!REG_P (XEXP (src, 1))
-         && (!SUBREG_P (XEXP (src, 1))
-             || SUBREG_BYTE (XEXP (src, 1)) != 0
-             || !REG_P (SUBREG_REG (XEXP (src, 1))))
-         && (!CONST_INT_P (XEXP (src, 1))
-             || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
-       return false;
-
-      if (GET_MODE (XEXP (src, 1)) != QImode
-         && !CONST_INT_P (XEXP (src, 1)))
+      if (!CONST_INT_P (XEXP (src, 1))
+         || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63))
        return false;
       break;
 
@@ -1664,15 +1656,10 @@ dimode_scalar_chain::compute_convert_gain ()
        {
          if (CONST_INT_P (XEXP (src, 0)))
            gain -= vector_const_cost (XEXP (src, 0));
-         if (CONST_INT_P (XEXP (src, 1)))
-           {
-             gain += ix86_cost->shift_const;
-             if (INTVAL (XEXP (src, 1)) >= 32)
-               gain -= COSTS_N_INSNS (1);
-           }
-         else
-           /* Additional gain for omitting two CMOVs.  */
-           gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
+
+         gain += ix86_cost->shift_const;
+         if (INTVAL (XEXP (src, 1)) >= 32)
+           gain -= COSTS_N_INSNS (1);
        }
       else if (GET_CODE (src) == PLUS
               || GET_CODE (src) == MINUS
@@ -1788,60 +1775,14 @@ dimode_scalar_chain::make_vector_copies (unsigned regno)
 {
   rtx reg = regno_reg_rtx[regno];
   rtx vreg = gen_reg_rtx (DImode);
-  bool count_reg = false;
   df_ref ref;
 
   for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
       {
-       df_ref use;
-
-       /* Detect the count register of a shift instruction.  */
-       for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
-         if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
-           {
-             rtx_insn *insn = DF_REF_INSN (use);
-             rtx def_set = single_set (insn);
-
-             gcc_assert (def_set);
-
-             rtx src = SET_SRC (def_set);
-
-             if ((GET_CODE (src) == ASHIFT
-                  || GET_CODE (src) == ASHIFTRT
-                  || GET_CODE (src) == LSHIFTRT)
-                 && !CONST_INT_P (XEXP (src, 1))
-                 && reg_or_subregno (XEXP (src, 1)) == regno)
-               count_reg = true;
-           }
-
        start_sequence ();
-       if (count_reg)
-         {
-           rtx qreg = gen_lowpart (QImode, reg);
-           rtx tmp = gen_reg_rtx (SImode);
 
-           if (TARGET_ZERO_EXTEND_WITH_AND
-               && optimize_function_for_speed_p (cfun))
-             {
-               emit_move_insn (tmp, const0_rtx);
-               emit_insn (gen_movstrictqi
-                          (gen_lowpart (QImode, tmp), qreg));
-             }
-           else
-             emit_insn (gen_rtx_SET
-                        (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
-
-           if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
-             {
-               rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
-               emit_move_insn (slot, tmp);
-               tmp = copy_rtx (slot);
-             }
-
-           emit_insn (gen_zero_extendsidi2 (vreg, tmp));
-         }
-       else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
+       if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
          {
            rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
            emit_move_insn (adjust_address (tmp, SImode, 0),
@@ -1889,25 +1830,8 @@ dimode_scalar_chain::make_vector_copies (unsigned regno)
     if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
       {
        rtx_insn *insn = DF_REF_INSN (ref);
-       if (count_reg)
-         {
-           rtx def_set = single_set (insn);
-           gcc_assert (def_set);
 
-           rtx src = SET_SRC (def_set);
-
-           if ((GET_CODE (src) == ASHIFT
-                || GET_CODE (src) == ASHIFTRT
-                || GET_CODE (src) == LSHIFTRT)
-               && !CONST_INT_P (XEXP (src, 1))
-               && reg_or_subregno (XEXP (src, 1)) == regno)
-             {
-               XEXP (src, 0) = replace_with_subreg (XEXP (src, 0), reg, reg);
-               XEXP (src, 1) = vreg;
-             }
-         }
-       else
-         replace_with_subreg_in_insn (insn, reg, vreg);
+       replace_with_subreg_in_insn (insn, reg, vreg);
 
        if (dump_file)
          fprintf (dump_file, "  Replaced r%d with r%d in insn %d\n",
@@ -2010,43 +1934,7 @@ dimode_scalar_chain::convert_reg (unsigned regno)
            rtx src = SET_SRC (def_set);
            rtx dst = SET_DEST (def_set);
 
-           if ((GET_CODE (src) == ASHIFT
-                || GET_CODE (src) == ASHIFTRT
-                || GET_CODE (src) == LSHIFTRT)
-               && !CONST_INT_P (XEXP (src, 1))
-               && reg_or_subregno (XEXP (src, 1)) == regno)
-             {
-               rtx tmp2 = gen_reg_rtx (V2DImode);
-
-               start_sequence ();
-
-               if (TARGET_SSE4_1)
-                 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
-                            (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
-               else
-                 {
-                   rtx vec_cst
-                     = gen_rtx_CONST_VECTOR (V2DImode,
-                                             gen_rtvec (2, GEN_INT (0xff),
-                                                        const0_rtx));
-                   vec_cst
-                     = validize_mem (force_const_mem (V2DImode, vec_cst));
-
-                   emit_insn (gen_rtx_SET
-                              (tmp2,
-                               gen_rtx_AND (V2DImode,
-                                            gen_rtx_SUBREG (V2DImode, reg, 0),
-                                            vec_cst)));
-                 }
-               rtx_insn *seq = get_insns ();
-               end_sequence ();
-
-               emit_insn_before (seq, insn);
-
-               XEXP (src, 0) = replace_with_subreg (XEXP (src, 0), reg, reg);
-               XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
-             }
-           else if (!MEM_P (dst) || !REG_P (src))
+           if (!MEM_P (dst) || !REG_P (src))
              replace_with_subreg_in_insn (insn, reg, reg);
 
            bitmap_clear_bit (conv, INSN_UID (insn));

Reply via email to