Hi Ajit,
On 9/17/23 22:59, Ajit Agarwal wrote:
This new version of patch 6 use improve ree pass for rs6000 target using
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.
Review comments incorporated.
Thanks & Regards
Ajit
Nit: This seems to belong to "what changed in v6" between the two "---"
lines right before start of source diff.
ree: Improve ree pass for rs6000 target using defined abi interfaces
For rs6000 target we see redundant zero and sign extension and done to
improve ree pass to eliminate such redundant zero and sign extension
using defined ABI interfaces.
It seems you have redundant "redundant zero and sign extension" - pun
intended ;-)
On a serious note, when debugging your code for a possible RISC-V
benefit, it seems what it is trying to do is address REE giving up due
to "missing definition(s)". Perhaps mentioning that in commitlog would
give the reader more context.
+/* Return TRUE if target mode is equal to source mode of zero_extend
+ or sign_extend otherwise false. */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+ int unsignedp;
+ machine_mode tgt_mode =
+ targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp,
+ NULL_TREE, 1);
+
+ if (tgt_mode == mode)
+ return true;
+ else
+ return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+ an return registers. */
Additional Whitespace and grammer
s/an return registers/a return register
Please *run* contrib/check_gnu_style on your patch before sending out on
mailing lists, saves reviewers time and they can focus more on technical
content.
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+ rtx set = single_set (insn);
+
+ if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+ return false;
This still has ABI assumptions: RISC-V generates SIGN_EXTEND for
functions args and return reg.
This is not a deficiency of patch per-se, but something we would like to
address - even if as an addon-patch.
+
+ if (FUNCTION_VALUE_REGNO_P (regno))
+ return true;
+
+ return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+ and not return registers and source and destination operand are same
+ and mode of source and destination operand are not same. */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+ rtx set = single_set (insn);
+
+ if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+ return false;
Ditto: ABI assumption.
+
+ machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
why not simply @dst_mode
+ rtx orig_src = XEXP (SET_SRC (set),0);
+
+ bool copy_needed
+ = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
Maybe use @orig_src here, rather than duplicating XEXP (SET_SRC (set),0)
+ if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
The bailing out for copy_needed needs extra commentary, why ?
+ && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+ && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+ return true;
+
+ return false;
Consider this bike-shed but I would arrange this code differently. The
main case here is check for function args and then the not so imp reasons
+ rtx orig_src = XEXP (src, 0);
+
+ if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+ || abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+ return false;
+
+ /* commentary as to why .... */
+ if (dst_mode == GET_MODE (orig_src))
+ return false;
- bool copy_needed
- = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+ /* copy needed ..... */
+ if (REGNO (SET_DEST (set)) != REGNO (orig_src))
+ return false;
+
+ return true;
+/* Return TRUE if the candidate insn is zero extend and regno is
+ an argument registers. */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+ if (code != ZERO_EXTEND)
+ return false;
ABI assumption still.
+
+ if (FUNCTION_ARG_REGNO_P (regno))
+ return true;
+
+ return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class. */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+ if (side_effects_p (PATTERN (insn)))
+ return false;
+
+ struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
+
+ if (!uses)
+ return false;
+
+ for (df_link *use = uses; use; use = use->next)
+ {
+ if (!use->ref)
+ return false;
+
+ if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+ return false;
+
+ rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+ if (GET_CODE (PATTERN (use_insn)) == SET)
+ {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+ return false;
+ }
+ }
+ return true;
+}
+
/* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction. The changes
@@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat,
ext_state *state)
state->defs_list.truncate (0);
state->copies_list.truncate (0);
+ rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+ if (abi_extension_candidate_p (cand->insn)
+ && (!get_defs (cand->insn, orig_src, NULL)))
+ return abi_handle_regs_without_defs_p (cand->insn);
outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
@@ -1036,6 +1154,15 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
}
}
+ rtx insn_set = single_set (cand->insn);
+
+ machine_mode mode = (GET_MODE (XEXP (SET_SRC (insn_set), 0)));
+
+ bool promote_p = abi_target_promote_function_mode (mode);
+
+ if (promote_p)
+ return true;
+
if (merge_successful)
{
/* Commit the changes here if possible
@@ -1112,12 +1239,20 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
rtx reg = XEXP (src, 0);
struct df_link *defs, *def;
ext_cand *cand;
+ defs = get_defs (insn, reg, NULL);
/* Zero-extension of an undefined value is partly defined (it's
completely undefined for sign-extension, though). So if there exists
a path from the entry to this zero-extension that leaves this register
uninitialized, removing the extension could change the behavior of
correct programs. So first, check it is not the case. */
+ if (!defs && abi_extension_candidate_argno_p (code, REGNO (reg)))
+ {
+ ext_cand e = {expr, code, mode, insn};
+ insn_list->safe_push (e);
+ return;
+ }
+
Naively I would expect this change to go in the existing !defs { } block
below, after the uninitialized case but it seems this is deliberate -
you could be bypassing the uninitialized data case ... (see below as well)
if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
{
if (dump_file)
@@ -1131,7 +1266,6 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
}
/* Second, make sure we can get all the reaching definitions. */
- defs = get_defs (insn, reg, NULL);
if (!defs)
{
if (dump_file)
@@ -1321,7 +1455,8 @@ find_and_remove_re (void)
&& (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0))))
{
reinsn_copy_list.safe_push (curr_cand->insn);
- reinsn_copy_list.safe_push (state.defs_list[0]);
+ if (state.defs_list.length () != 0)
+ reinsn_copy_list.safe_push (state.defs_list[0]);
}
reinsn_del_list.safe_push (curr_cand->insn);
state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
@@ -1345,6 +1480,10 @@ find_and_remove_re (void)
for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
{
rtx_insn *curr_insn = reinsn_copy_list[i];
+
+ if ((i+1) >= reinsn_copy_list.length ())
+ continue;
+
rtx_insn *def_insn = reinsn_copy_list[i + 1];
/* Use the mode of the destination of the defining insn
diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
new file mode 100644
index 00000000000..5a050df06ff
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/zext-elim-3.C
@@ -0,0 +1,13 @@
+/* { dg-options "-mcpu=power9 -O2" } */
+
+void *memset(void *b, int c, unsigned long len)
+{
+ unsigned long i;
+
+ for (i = 0; i < len; i++)
+ ((unsigned char *)b)[i] = c;
+
+ return b;
+}
So I did a simple cc1 build for powerpc on gcc tip w/ and w/o your patch.
W/o the patch this test generates the rlwinm insn and REE spits out.
| Cannot eliminate extension:
| (insn 20 18 22 3 (set (reg:SI 4 4)
| (zero_extend:SI (reg:QI 4 4 [orig:120 c+3 ] [120])))
"zext-elim-3.c":8:29 7 {zero_extendqisi2}
| (nil))
| because it can operate on uninitialized data
With the patch obviously the extension insn goes away and so does the
diagnostic, but it wonder your abi_extension_candidate_argno_p () check
before uniinitialized data check is correct/safe ? I've not looked
closely what the scope of that check is
IOW, is this test case sufficient or do we need a different test which
would cure a genuine "missing definitiion(s)" bailout of REE ?
+/* { dg-final { scan-assembler-not "\mrlwinm\M" } } */
Thanks,
-Vineet