On Thu, Feb 11, 2021 at 4:33 PM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> df_lr_bb_local_compute has:
>
>       FOR_EACH_INSN_INFO_DEF (def, insn_info)
>         /* If the def is to only part of the reg, it does
>            not kill the other defs that reach here.  */
>         if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
>
> However, as noted in the comment in the patch and below, almost all
> partial definitions have an associated use.  This means that the
> confluence function:
>
>   IN = (OUT & ~DEF) | USE
>
> is unaffected by whether partial definitions are in DEF or not.
>
> Even though the choice doesn't matter for the LR problem itself,
> it's IMO much more convenient for consumers if DEF contains all the
> definitions in the block.  The only pre-RTL-SSA code that tries to
> consume DEF directly is shrink-wrap.c, which already has to work
> around the incompleteness of the information:
>
>           /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
>              DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
>              at -O1, just give up searching NEXT_BLOCK.  */
>
> I hit the same problem when trying to fix the RTL-SSA part of PR98863.
>
> This patch treats partial definitions as both a def and a use,
> just like the df_ref records almost always do.
>
> To show that partial definitions almost always have uses:
>
>   DF_REF_CONDITIONAL:
>
>     Added by:
>
>       case COND_EXEC:
>         df_defs_record (collection_rec, COND_EXEC_CODE (x),
>                         bb, insn_info, DF_REF_CONDITIONAL);
>         break;
>
>     Later, df_get_conditional_uses creates uses for all DF_REF_CONDITIONAL
>     definitions.
>
>   DF_REF_PARTIAL:
>
>     In total, there are 4 locations at which we add partial definitions.
>
>     Case 1:
>
>       if (GET_CODE (dst) == STRICT_LOW_PART)
>         {
>           flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | 
> DF_REF_STRICT_LOW_PART;
>
>           loc = &XEXP (dst, 0);
>           dst = *loc;
>         }
>
>     Corresponding use:
>
>       case STRICT_LOW_PART:
>         {
>           rtx *temp = &XEXP (dst, 0);
>           /* A strict_low_part uses the whole REG and not just the
>              SUBREG.  */
>           dst = XEXP (dst, 0);
>           df_uses_record (collection_rec,
>                           (GET_CODE (dst) == SUBREG) ? &SUBREG_REG (dst) : 
> temp,
>                           DF_REF_REG_USE, bb, insn_info,
>                           DF_REF_READ_WRITE | DF_REF_STRICT_LOW_PART);
>         }
>         break;
>
>     Case 2:
>
>       if (GET_CODE (dst) == ZERO_EXTRACT)
>         {
>           flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL | DF_REF_ZERO_EXTRACT;
>
>           loc = &XEXP (dst, 0);
>           dst = *loc;
>         }
>
>     Corresponding use:
>
>       case ZERO_EXTRACT:
>         {
>           df_uses_record (collection_rec, &XEXP (dst, 1),
>                           DF_REF_REG_USE, bb, insn_info, flags);
>           df_uses_record (collection_rec, &XEXP (dst, 2),
>                           DF_REF_REG_USE, bb, insn_info, flags);
>           if (GET_CODE (XEXP (dst,0)) == MEM)
>             df_uses_record (collection_rec, &XEXP (dst, 0),
>                             DF_REF_REG_USE, bb, insn_info,
>                             flags);
>           else
>             df_uses_record (collection_rec, &XEXP (dst, 0),
>                             DF_REF_REG_USE, bb, insn_info,
>                             DF_REF_READ_WRITE | DF_REF_ZERO_EXTRACT);
> ----------------------------^^^^^^^^^^^^^^^^^
>         }
>         break;
>
>     Case 3:
>
>       else if (GET_CODE (dst) == SUBREG && REG_P (SUBREG_REG (dst)))
>         {
>           if (read_modify_subreg_p (dst))
>             flags |= DF_REF_READ_WRITE | DF_REF_PARTIAL;
>
>           flags |= DF_REF_SUBREG;
>
>           df_ref_record (DF_REF_REGULAR, collection_rec,
>                          dst, loc, bb, insn_info, DF_REF_REG_DEF, flags);
>         }
>
>     Corresponding use:
>
>       case SUBREG:
>         if (read_modify_subreg_p (dst))
>           {
>             df_uses_record (collection_rec, &SUBREG_REG (dst),
>                             DF_REF_REG_USE, bb, insn_info,
>                             flags | DF_REF_READ_WRITE | DF_REF_SUBREG);
>             break;
>           }
>
>     Case 4:
>
>       /*  If this is a multiword hardreg, we create some extra
>           datastructures that will enable us to easily build REG_DEAD
>           and REG_UNUSED notes.  */
>       if (collection_rec
>           && (endregno != regno + 1) && insn_info)
>         {
>           /* Sets to a subreg of a multiword register are partial.
>              Sets to a non-subreg of a multiword register are not.  */
>           if (GET_CODE (reg) == SUBREG)
>             ref_flags |= DF_REF_PARTIAL;
>           ref_flags |= DF_REF_MW_HARDREG;
>
>     Corresponding use:
>
>       None.  However, this case should be rare to non-existent on most
>       targets, and the current handling seems suspect.  See the comment
>       in the patch for more details.
>
> Tested so far on aarch64-linux-gnu, both individually and with
> the follow-on rtl-ssa patch.  WDYT?  I realise it's not ideal
> to be applying this in stage 4, but it would be expensive
> to work around.
>
> FWIW, I also tried a variation:
>
>    df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>    FOR_EACH_INSN_INFO_DEF (def, insn_info)
>      {
>        unsigned int dregno = DF_REF_REGNO (def);
>        bitmap_set_bit (&bb_info->def, dregno);
>        bitmap_clear_bit (&bb_info->use, dregno);
>      }
>
>    FOR_EACH_INSN_INFO_USE (use, insn_info)
>      /* Add use to set of uses in this BB.  */
>      bitmap_set_bit (&bb_info->use, DF_REF_REGNO (use));
>
>    if (CHECKING_P)
>      FOR_EACH_INSN_INFO_DEF (def, insn_info)
>        if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
>          gcc_assert (bitmap_bit_p (&bb_info->use, DF_REF_REGNO (def)));
>
> This obviously triggers case 4 above, as seen in the testcase.
> It nevertheless survived bootstrap & regression test.  This is what
> I expected, because case 4 should never trigger on most targets,
> including AArch64.
>
> I think this shows that the LR in/out sets are very likely to be
> unaffected by the patch.

I'd say it should nevertheless be a correctness fix.  I suppose you
ran into the cases where it was _not_ unaffected by the patch?
(the multi-hard-reg subreg case?)  The comment mentions that the
bug might be elsewhere though (in df-scan I suppose), so is isn't it better
to fix the missing USE or making it two KILLs there?  I realize the
fallout of that might be bigger at this point.

Richard.

> I also tried renaming the “def” field to something else.  It was enough
> to update the df-problem.c:df_lr_* functions, shrink-wrap.c and the
> rtl-ssa code.
>
> Thanks,
> Richard
>
>
> gcc/
>         * df-problems.c (df_lr_bb_local_compute): Treat partial definitions
>         as read-modify operations.
>
> gcc/testsuite/
>         * gcc.dg/rtl/aarch64/multi-subreg-1.c: New test.
>
> ---
>  gcc/df-problems.c                             | 54 ++++++++++++++++---
>  .../gcc.dg/rtl/aarch64/multi-subreg-1.c       | 19 +++++++
>  2 files changed, 66 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c
>
> diff --git a/gcc/df-problems.c b/gcc/df-problems.c
> index 8fe58581f32..83b200baf9d 100644
> --- a/gcc/df-problems.c
> +++ b/gcc/df-problems.c
> @@ -842,14 +842,54 @@ df_lr_bb_local_compute (unsigned int bb_index)
>
>        df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
>        FOR_EACH_INSN_INFO_DEF (def, insn_info)
> -       /* If the def is to only part of the reg, it does
> -          not kill the other defs that reach here.  */
> -       if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> -         {
> -           unsigned int dregno = DF_REF_REGNO (def);
> -           bitmap_set_bit (&bb_info->def, dregno);
> +       {
> +         /* If the definition is to only part of the register, it will
> +            usually have a corresponding use.  For example, stores to one
> +            word of a multiword register R have both a use and a partial
> +            definition of R.
> +
> +            In those cases, the LR confluence function:
> +
> +              IN = (OUT & ~DEF) | USE
> +
> +            is unaffected by whether we count the partial definition or not.
> +            However, it's more convenient for consumers if DEF contains
> +            *all* the registers defined in a block.
> +
> +            The only current case in which we record a partial definition
> +            without a corresponding use is if the destination is the
> +            multi-register subreg of a hard register.  An artificial
> +            example of this is:
> +
> +               (set (subreg:TI (reg:V8HI x0) 0) (const_int -1))
> +
> +            on AArch64.  This is described as a DF_REF_PARTIAL
> +            definition of x0 and x1 with no corresponding uses.
> +            In previous versions of GCC, the instruction had no
> +            effect on LR (that is, LR acted as though the instruction
> +            didn't exist).
> +
> +            It seems suspect that this case is treated differently.
> +            Either the instruction should be a full definition of x0 and x1,
> +            or the definition should be treated in the same way as other
> +            partial definitions, such as strict_lowparts or subregs that
> +            satisfy read_modify_subreg_p.
> +
> +            Fortunately, multi-register subregs of hard registers should
> +            be rare.  They should be folded into a plain REG if the target
> +            allows that (as AArch64 does for example above).
> +
> +            Here we treat the cases alike by forcing a use even in the rare
> +            case that no DF_REF_REG_USE is recorded.  That is, we model all
> +            partial definitions as both a use and a definition of the
> +            register.  */
> +         unsigned int dregno = DF_REF_REGNO (def);
> +         bitmap_set_bit (&bb_info->def, dregno);
> +         if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
> +           bitmap_set_bit (&bb_info->use, dregno);
> +         else
>             bitmap_clear_bit (&bb_info->use, dregno);
> -         }
> +       }
>
>        FOR_EACH_INSN_INFO_USE (use, insn_info)
>         /* Add use to set of uses in this BB.  */
> diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c 
> b/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c
> new file mode 100644
> index 00000000000..d7be5592e78
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/aarch64/multi-subreg-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-additional-options "-O -fdump-rtl-cse1-all" } */
> +
> +__int128 __RTL (startwith ("vregs")) foo (void)
> +{
> +(function "foo"
> +  (insn-chain
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +      (cinsn 3 (set (subreg:TI (reg:V8HI x0) 0) (const_int -1)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    )
> +  )
> +  (crtl (return_rtx (reg/i:TI x0)))
> +)
> +}
> +
> +/* { dg-final { scan-rtl-dump {(?n)lr *def.*\[x0\].*\[x1\]} cse1 } } */

Reply via email to