Hi Ajit,

On 9/17/23 22:59, Ajit Agarwal wrote:
This new version of patch 6 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Nit: This seems to belong to "what changed in v6" between the two "---" lines right before start of source diff.

ree: Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign extension and done to
improve ree pass to eliminate such redundant zero and sign extension
using defined ABI interfaces.

It seems you have redundant "redundant zero and sign extension" - pun intended  ;-)

On a serious note, when debugging your code for a possible RISC-V benefit, it seems what it is trying to do is address REE giving up due to "missing definition(s)". Perhaps mentioning that in commitlog would give the reader more context.

+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+    targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
+                                        NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+    return true;
+  else
+    return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */

Additional Whitespace and grammer
s/an return  registers/a return register

Please *run* contrib/check_gnu_style on your patch before sending out on mailing lists, saves reviewers time and they can focus more on technical content.

+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+    return false;

This still has ABI assumptions: RISC-V generates SIGN_EXTEND for functions args and return reg. This is not a deficiency of patch per-se, but something we would like to address - even if as an addon-patch.

+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+    return false;
Ditto: ABI assumption.

+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));

why not simply @dst_mode

+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));

Maybe use @orig_src here, rather than duplicating XEXP (SET_SRC (set),0)

+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)

The bailing out for copy_needed needs extra commentary, why ?

+      && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+    return true;
+
+  return false;

Consider this bike-shed but I would arrange this code differently. The main case here is check for function args and then the not so imp reasons

+  rtx orig_src = XEXP (src, 0);
+
+  if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+      || abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+    return false;
+
+  /* commentary as to why .... */
+  if (dst_mode == GET_MODE (orig_src))
+    return false;

-   bool copy_needed
-    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+  /* copy needed  ..... */
+  if (REGNO (SET_DEST (set)) != REGNO (orig_src))
+    return false;
+
+ return true;

+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code != ZERO_EXTEND)
+    return false;

ABI assumption still.

+
+  if (FUNCTION_ARG_REGNO_P (regno))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+    return false;
+
+  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+    return false;
+
+  for (df_link *use = uses; use; use = use->next)
+    {
+      if (!use->ref)
+       return false;
+
+      if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+       return false;
+
+      rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+      if (GET_CODE (PATTERN (use_insn)) == SET)
+       {
+         rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+         if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+             || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+             || GET_RTX_CLASS (code) == RTX_UNARY)
+           return false;
+       }
+     }
+  return true;
+}
+
  /* This function goes through all reaching defs of the source
     of the candidate for elimination (CAND) and tries to combine
     the extension with the definition instruction.  The changes
@@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
state->defs_list.truncate (0);
    state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+  if (abi_extension_candidate_p (cand->insn)
+      && (!get_defs (cand->insn, orig_src, NULL)))
+    return abi_handle_regs_without_defs_p (cand->insn);
outcome = make_defs_and_copies_lists (cand->insn, set_pat, state); @@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
          }
      }
+ rtx insn_set = single_set (cand->insn);
+
+  machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
+
+  bool promote_p = abi_target_promote_function_mode (mode);
+
+  if (promote_p)
+    return true;
+
    if (merge_successful)
      {
        /* Commit the changes here if possible
@@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
        rtx reg = XEXP (src, 0);
        struct df_link *defs, *def;
        ext_cand *cand;
+      defs = get_defs (insn, reg, NULL);
/* Zero-extension of an undefined value is partly defined (it's
         completely undefined for sign-extension, though).  So if there exists
         a path from the entry to this zero-extension that leaves this register
         uninitialized, removing the extension could change the behavior of
         correct programs.  So first, check it is not the case.  */
+      if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
+       {
+         ext_cand e = {expr, code, mode, insn};
+         insn_list->safe_push (e);
+         return;
+       }
+
Naively I would expect this change to go in the existing !defs { } block below, after the uninitialized case but it seems this is deliberate - you could be bypassing the uninitialized data case ... (see below as well)

        if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
        {
          if (dump_file)
@@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
        }
/* Second, make sure we can get all the reaching definitions. */
-      defs = get_defs (insn, reg, NULL);
        if (!defs)
        {
          if (dump_file)
@@ -1321,7 +1455,8 @@ find_and_remove_re (void)
              && (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
            {
                reinsn_copy_list.safe_push (curr_cand->insn);
-              reinsn_copy_list.safe_push (state.defs_list[0]);
+             if (state.defs_list.length () != 0)
+               reinsn_copy_list.safe_push (state.defs_list[0]);
            }
          reinsn_del_list.safe_push (curr_cand->insn);
          state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
@@ -1345,6 +1480,10 @@ find_and_remove_re (void)
    for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
      {
        rtx_insn *curr_insn = reinsn_copy_list[i];
+
+      if ((i+1) >= reinsn_copy_list.length ())
+       continue;
+
        rtx_insn *def_insn = reinsn_copy_list[i + 1];
/* Use the mode of the destination of the defining insn
diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C 
b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
new file mode 100644
index 00000000000..5a050df06ff
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
@@ -0,0 +1,13 @@
+/* { dg-options "-mcpu=power9 -O2" } */
+
+void *memset(void *b, int c, unsigned long len)
+{
+  unsigned long i;
+
+  for (i = 0; i < len; i++)
+    ((unsigned char *)b)[i] = c;
+
+   return b;
+}

So I did a simple cc1 build for powerpc on gcc tip w/ and w/o your patch.
W/o the patch this test generates the rlwinm insn and REE spits out.

| Cannot eliminate extension:
| (insn 20 18 22 3 (set (reg:SI 4 4)
|        (zero_extend:SI (reg:QI 4 4 [orig:120 c+3 ] [120]))) "zext-elim-3.c":8:29 7 {zero_extendqisi2}
|     (nil))
| because it can operate on uninitialized data

With the patch obviously the extension insn goes away and so does the diagnostic, but it wonder your abi_extension_candidate_argno_p () check before uniinitialized data check is correct/safe ? I've not looked closely what the scope of that check is

IOW, is this test case sufficient or do we need a different test which would cure a genuine "missing definitiion(s)" bailout of REE ?

+/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */

Thanks,
-Vineet

Reply via email to