On Tue, Aug 27, 2019 at 2:14 PM Richard Biener <rguent...@suse.de> wrote:
>
>
> With forcing STV for all chains I run into the same issue as I
> fixed for the analysis phase which is looping over all defs
> of a pseudo rather just those interesting.  The following fixes
> this by recording insn/pseudo pairs in mark_dual_mode_def;
> well, not actually pairs but tracking insns in addition to
> pseudos which allows us to cheaply do the set property plus
> visit the defs that are interesting (assuming the number of
> defs on an insn is a lot less endless than the number of defs
> of a pseudo).
>
> This way the original testcase with STV forced goes down from
> 200s to 24s compile-time with
>
>  machine dep reorg                  :   7.09 ( 20%)   0.01 (  2%)   8.18 (
> 21%)    1854 kB (  2%)
>
> where almost all of the compile-time is spent in DFs
> deferred rescan processing.  I'm not sure we even need
> that but I'm not too eager to dig more into DF than necessary
> right now (we call df_finish () at the end which removes
> the DU/UD_CHAIN and the MD problem (what do we need that for!?).
> We don't appropriately scan all insn we add so the prevailing
> problems like LIVE are not updated, but well...
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu with STV
> forced and for Haswell this time.
>
> OK?
>
> Thanks,
> Richard.
>
> 2019-08-27  Richard Biener  <rguent...@suse.de>
>
>         * config/i386/i386-features.h
>         (general_scalar_chain::~general_scalar_chain): Add.
>         (general_scalar_chain::insns_conv): New bitmap.
>         (general_scalar_chain::n_sse_to_integer): New.
>         (general_scalar_chain::n_integer_to_sse): Likewise.
>         (general_scalar_chain::make_vector_copies): Adjust signature.
>         * config/i386/i386-features.c
>         (general_scalar_chain::general_scalar_chain): Outline,
>         initialize new members.
>         (general_scalar_chain::~general_scalar_chain): New.
>         (general_scalar_chain::mark_dual_mode_def): Record insns
>         we need to insert conversions at and count them.
>         (general_scalar_chain::compute_convert_gain): Account
>         for conversion instructions at chain boundary.
>         (general_scalar_chain::make_vector_copies): Generate a single
>         copy for a def by a specific insn.
>         (general_scalar_chain::convert_registers): First populate
>         defs_map, then make copies at out-of chain insns.

LGTM.

Thanks,
Uros.

> Index: gcc/config/i386/i386-features.c
> ===================================================================
> --- gcc/config/i386/i386-features.c     (revision 274945)
> +++ gcc/config/i386/i386-features.c     (working copy)
> @@ -320,6 +320,20 @@ scalar_chain::add_to_queue (unsigned ins
>    bitmap_set_bit (queue, insn_uid);
>  }
>
> +general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
> +                                           enum machine_mode vmode_)
> +     : scalar_chain (smode_, vmode_)
> +{
> +  insns_conv = BITMAP_ALLOC (NULL);
> +  n_sse_to_integer = 0;
> +  n_integer_to_sse = 0;
> +}
> +
> +general_scalar_chain::~general_scalar_chain ()
> +{
> +  BITMAP_FREE (insns_conv);
> +}
> +
>  /* For DImode conversion, mark register defined by DEF as requiring
>     conversion.  */
>
> @@ -328,15 +342,27 @@ general_scalar_chain::mark_dual_mode_def
>  {
>    gcc_assert (DF_REF_REG_DEF_P (def));
>
> -  if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
> -    return;
> -
> +  /* Record the def/insn pair so we can later efficiently iterate over
> +     the defs to convert on insns not in the chain.  */
> +  bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
> +  if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
> +    {
> +      if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
> +         && !reg_new)
> +       return;
> +      n_integer_to_sse++;
> +    }
> +  else
> +    {
> +      if (!reg_new)
> +       return;
> +      n_sse_to_integer++;
> +    }
> +
>    if (dump_file)
>      fprintf (dump_file,
>              "  Mark r%d def in insn %d as requiring both modes in chain 
> #%d\n",
>              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
> -
> -  bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
>  }
>
>  /* For TImode conversion, it is unused.  */
> @@ -523,7 +549,7 @@ general_scalar_chain::compute_convert_ga
>                || GET_CODE (src) == ASHIFTRT
>                || GET_CODE (src) == LSHIFTRT)
>         {
> -         if (CONST_INT_P (XEXP (src, 0)))
> +         if (CONST_INT_P (XEXP (src, 0)))
>             igain -= vector_const_cost (XEXP (src, 0));
>           igain += m * ix86_cost->shift_const - ix86_cost->sse_op;
>           if (INTVAL (XEXP (src, 1)) >= 32)
> @@ -588,9 +614,12 @@ general_scalar_chain::compute_convert_ga
>    if (dump_file)
>      fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
>
> -  /* ???  What about integer to SSE?  */
> -  EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
> -    cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->sse_to_integer;
> +  /* Cost the integer to sse and sse to integer moves.  */
> +  cost += n_sse_to_integer * ix86_cost->sse_to_integer;
> +  /* ???  integer_to_sse but we only have that in the RA cost table.
> +     Assume sse_to_integer/integer_to_sse are the same which they
> +     are at the moment.  */
> +  cost += n_integer_to_sse * ix86_cost->sse_to_integer;
>
>    if (dump_file)
>      fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
> @@ -649,85 +678,64 @@ gen_gpr_to_xmm_move_src (enum machine_mo
>     and replace its uses in a chain.  */
>
>  void
> -general_scalar_chain::make_vector_copies (unsigned regno)
> +general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
>  {
> -  rtx reg = regno_reg_rtx[regno];
> -  rtx vreg = gen_reg_rtx (smode);
> -  df_ref ref;
> +  rtx vreg = *defs_map.get (reg);
>
> -  defs_map.put (reg, vreg);
> -
> -  /* For each insn defining REGNO, see if it is defined by an insn
> -     not part of the chain but with uses in insns part of the chain
> -     and insert a copy in that case.  */
> -  for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
> +  start_sequence ();
> +  if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
>      {
> -      if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
> -       continue;
> -      df_link *use;
> -      for (use = DF_REF_CHAIN (ref); use; use = use->next)
> -       if (!DF_REF_REG_MEM_P (use->ref)
> -           && bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref)))
> -         break;
> -      if (!use)
> -       continue;
> -
> -      start_sequence ();
> -      if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
> +      rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
> +      if (smode == DImode && !TARGET_64BIT)
>         {
> -         rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
> -         if (smode == DImode && !TARGET_64BIT)
> -           {
> -             emit_move_insn (adjust_address (tmp, SImode, 0),
> -                             gen_rtx_SUBREG (SImode, reg, 0));
> -             emit_move_insn (adjust_address (tmp, SImode, 4),
> -                             gen_rtx_SUBREG (SImode, reg, 4));
> -           }
> -         else
> -           emit_move_insn (copy_rtx (tmp), reg);
> -         emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
> -                                 gen_gpr_to_xmm_move_src (vmode, tmp)));
> +         emit_move_insn (adjust_address (tmp, SImode, 0),
> +                         gen_rtx_SUBREG (SImode, reg, 0));
> +         emit_move_insn (adjust_address (tmp, SImode, 4),
> +                         gen_rtx_SUBREG (SImode, reg, 4));
>         }
> -      else if (!TARGET_64BIT && smode == DImode)
> +      else
> +       emit_move_insn (copy_rtx (tmp), reg);
> +      emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
> +                             gen_gpr_to_xmm_move_src (vmode, tmp)));
> +    }
> +  else if (!TARGET_64BIT && smode == DImode)
> +    {
> +      if (TARGET_SSE4_1)
>         {
> -         if (TARGET_SSE4_1)
> -           {
> -             emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
> -                                         CONST0_RTX (V4SImode),
> -                                         gen_rtx_SUBREG (SImode, reg, 0)));
> -             emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 
> 0),
> -                                           gen_rtx_SUBREG (V4SImode, vreg, 
> 0),
> -                                           gen_rtx_SUBREG (SImode, reg, 4),
> -                                           GEN_INT (2)));
> -           }
> -         else
> -           {
> -             rtx tmp = gen_reg_rtx (DImode);
> -             emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
> -                                         CONST0_RTX (V4SImode),
> -                                         gen_rtx_SUBREG (SImode, reg, 0)));
> -             emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
> -                                         CONST0_RTX (V4SImode),
> -                                         gen_rtx_SUBREG (SImode, reg, 4)));
> -             emit_insn (gen_vec_interleave_lowv4si
> -                        (gen_rtx_SUBREG (V4SImode, vreg, 0),
> -                         gen_rtx_SUBREG (V4SImode, vreg, 0),
> -                         gen_rtx_SUBREG (V4SImode, tmp, 0)));
> -           }
> +         emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                                     CONST0_RTX (V4SImode),
> +                                     gen_rtx_SUBREG (SImode, reg, 0)));
> +         emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                                       gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                                       gen_rtx_SUBREG (SImode, reg, 4),
> +                                       GEN_INT (2)));
>         }
>        else
> -       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
> -                               gen_gpr_to_xmm_move_src (vmode, reg)));
> -      rtx_insn *seq = get_insns ();
> -      end_sequence ();
> -      rtx_insn *insn = DF_REF_INSN (ref);
> -      emit_conversion_insns (seq, insn);
> -
> -      if (dump_file)
> -       fprintf (dump_file,
> -                "  Copied r%d to a vector register r%d for insn %d\n",
> -                regno, REGNO (vreg), INSN_UID (insn));
> +       {
> +         rtx tmp = gen_reg_rtx (DImode);
> +         emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                                     CONST0_RTX (V4SImode),
> +                                     gen_rtx_SUBREG (SImode, reg, 0)));
> +         emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
> +                                     CONST0_RTX (V4SImode),
> +                                     gen_rtx_SUBREG (SImode, reg, 4)));
> +         emit_insn (gen_vec_interleave_lowv4si
> +                    (gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                     gen_rtx_SUBREG (V4SImode, vreg, 0),
> +                     gen_rtx_SUBREG (V4SImode, tmp, 0)));
> +       }
>      }
> +  else
> +    emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
> +                           gen_gpr_to_xmm_move_src (vmode, reg)));
> +  rtx_insn *seq = get_insns ();
> +  end_sequence ();
> +  emit_conversion_insns (seq, insn);
> +
> +  if (dump_file)
> +    fprintf (dump_file,
> +            "  Copied r%d to a vector register r%d for insn %d\n",
> +            REGNO (reg), REGNO (vreg), INSN_UID (insn));
>  }
>
>  /* Copy the definition SRC of INSN inside the chain to DST for
> @@ -1158,7 +1164,11 @@ general_scalar_chain::convert_registers
>    bitmap_iterator bi;
>    unsigned id;
>    EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
> -    make_vector_copies (id);
> +    defs_map.put (regno_reg_rtx[id], gen_reg_rtx (smode));
> +  EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
> +    for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC 
> (ref))
> +      if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
> +       make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
>  }
>
>  /* Convert whole chain creating required register
> Index: gcc/config/i386/i386-features.h
> ===================================================================
> --- gcc/config/i386/i386-features.h     (revision 274945)
> +++ gcc/config/i386/i386-features.h     (working copy)
> @@ -167,16 +167,19 @@ class scalar_chain
>  class general_scalar_chain : public scalar_chain
>  {
>   public:
> -  general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
> -    : scalar_chain (smode_, vmode_) {}
> +  general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_);
> +  ~general_scalar_chain ();
>    int compute_convert_gain ();
>   private:
>    hash_map<rtx, rtx> defs_map;
> +  bitmap insns_conv;
> +  unsigned n_sse_to_integer;
> +  unsigned n_integer_to_sse;
>    void mark_dual_mode_def (df_ref def);
>    void convert_insn (rtx_insn *insn);
>    void convert_op (rtx *op, rtx_insn *insn);
>    void convert_reg (rtx_insn *insn, rtx dst, rtx src);
> -  void make_vector_copies (unsigned regno);
> +  void make_vector_copies (rtx_insn *, rtx);
>    void convert_registers ();
>    int vector_const_cost (rtx exp);
>  };

Reply via email to