Hello, This one refactors some copy pasta that my previous patch regarding this matter introduced and catches more unnecessary sign/zero extensions of T bit stores. It also fixes the bug reported in PR 54925 which popped up after the last patch for PR 51244. Tested on rev 192417 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and no new failures. OK? Cheers, Oleg gcc/ChangeLog: PR target/51244 * config/sh/sh-protos.h (set_of_reg): New struct. (sh_find_set_of_reg, sh_is_logical_t_store_expr, sh_try_omit_signzero_extend): Declare... * config/sh/sh.c (sh_find_set_of_reg, sh_is_logical_t_store_expr, sh_try_omit_signzero_extend): ...these new functions. * config/sh/sh.md (*logical_op_t): New insn_and_split. (*zero_extend<mode>si2_compact): Use sh_try_omit_signzero_extend in splitter. (*extend<mode>si2_compact_reg): Convert to insn_and_split. Use sh_try_omit_signzero_extend in splitter. (*mov<mode>_reg_reg): Disallow t_reg_operand as operand 1. (*cbranch_t): Rewrite combine part in splitter using new sh_find_set_of_reg function. testsuite/ChangeLog: PR target/51244 * gcc.target/sh/pr51244-17.c: New.
Index: gcc/config/sh/sh-protos.h =================================================================== --- gcc/config/sh/sh-protos.h (revision 192417) +++ gcc/config/sh/sh-protos.h (working copy) @@ -163,6 +163,25 @@ enum machine_mode mode = VOIDmode); extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem); extern int sh_eval_treg_value (rtx op); + +/* Result value of sh_find_set_of_reg. */ +struct set_of_reg +{ + /* The insn where sh_find_set_of_reg stopped looking. + Can be NULL_RTX if the end of the insn list was reached. */ + rtx insn; + + /* The set rtx of the specified reg if found, NULL_RTX otherwise. */ + const_rtx set_rtx; + + /* The set source rtx of the specified reg if found, NULL_RTX otherwise. + Usually, this is the most interesting return value. */ + rtx set_src; +}; + +extern set_of_reg sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx)); +extern bool sh_is_logical_t_store_expr (rtx op, rtx insn); +extern rtx sh_try_omit_signzero_extend (rtx extended_op, rtx insn); #endif /* RTX_CODE */ extern void sh_cpu_cpp_builtins (cpp_reader* pfile); Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 192417) +++ gcc/config/sh/sh.c (working copy) @@ -13450,4 +13450,114 @@ return NULL_RTX; } +/*------------------------------------------------------------------------------ + Manual insn combine support code. +*/ + +/* Given a reg rtx and a start insn, try to find the insn that sets the + specified reg by using the specified insn stepping function, such as + 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx + of the reg set. */ +set_of_reg +sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx)) +{ + set_of_reg result; + result.insn = insn; + result.set_rtx = NULL_RTX; + result.set_src = NULL_RTX; + + if (!REG_P (reg) || insn == NULL_RTX) + return result; + + for (result.insn = stepfunc (insn); result.insn != NULL_RTX; + result.insn = stepfunc (result.insn)) + { + if (LABEL_P (result.insn) || BARRIER_P (result.insn)) + return result; + if (!NONJUMP_INSN_P (result.insn)) + continue; + if (reg_set_p (reg, result.insn)) + { + result.set_rtx = set_of (reg, result.insn); + + if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET) + return result; + + result.set_src = XEXP (result.set_rtx, 1); + return result; + } + } + + return result; +} + +/* Given an op rtx and an insn, try to find out whether the result of the + specified op consists only of logical operations on T bit stores. */ +bool +sh_is_logical_t_store_expr (rtx op, rtx insn) +{ + if (!logical_operator (op, SImode)) + return false; + + rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) }; + int op_is_t_count = 0; + + for (int i = 0; i < 2; ++i) + { + if (t_reg_operand (ops[i], VOIDmode) + || negt_reg_operand (ops[i], VOIDmode)) + op_is_t_count++; + + else + { + set_of_reg op_set = sh_find_set_of_reg (ops[i], insn, + prev_nonnote_insn_bb); + if (op_set.set_src == NULL_RTX) + continue; + + if (t_reg_operand (op_set.set_src, VOIDmode) + || negt_reg_operand (op_set.set_src, VOIDmode) + || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn)) + op_is_t_count++; + } + } + + return op_is_t_count == 2; +} + +/* Given the operand that is extended in a sign/zero extend insn, and the + insn, try to figure out whether the sign/zero extension can be replaced + by a simple reg-reg copy. If so, the replacement reg rtx is returned, + NULL_RTX otherwise. */ +rtx +sh_try_omit_signzero_extend (rtx extended_op, rtx insn) +{ + if (REG_P (extended_op)) + extended_op = extended_op; + else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op))) + extended_op = SUBREG_REG (extended_op); + else + return NULL_RTX; + + /* Reg moves must be of the same mode. */ + if (GET_MODE (extended_op) != SImode) + return NULL_RTX; + + set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb); + if (s.set_src == NULL_RTX) + return NULL_RTX; + + if (t_reg_operand (s.set_src, VOIDmode) + || negt_reg_operand (s.set_src, VOIDmode)) + return extended_op; + + /* If the zero extended reg was formed by a logical operation, check the + operands of the logical operation. If both originated from T bit + stores the zero extension can be eliminated. */ + else if (sh_is_logical_t_store_expr (s.set_src, s.insn)) + return extended_op; + + return NULL_RTX; +} + #include "gt-sh.h" Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 192417) +++ gcc/config/sh/sh.md (working copy) @@ -3708,6 +3708,26 @@ "xor %2,%0" [(set_attr "type" "arith")]) +;; The *logical_op_t pattern helps combine eliminating sign/zero extensions +;; of results where one of the inputs is a T bit store. Notice that this +;; pattern must not match during reload. If reload picks this pattern it +;; will be impossible to split it afterwards. +(define_insn_and_split "*logical_op_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "t_reg_operand")]))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 4) (reg:SI T_REG)) + (set (match_dup 0) (match_dup 3))] +{ + operands[4] = gen_reg_rtx (SImode); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, + operands[1], operands[4]); +}) + (define_insn "*xorsi3_media" [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") (xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") @@ -5590,39 +5610,7 @@ eliminated. Notice that this also helps the *cbranch_t splitter when it tries to post-combine tests and conditional branches, as it does not check for zero extensions. */ - rtx ext_reg; - if (REG_P (operands[1])) - ext_reg = operands[1]; - else if (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))) - ext_reg = SUBREG_REG (operands[1]); - else - FAIL; - - /* Reg moves must be of the same mode. */ - if (GET_MODE (ext_reg) != SImode) - FAIL; - - operands[2] = NULL_RTX; - for (rtx i = prev_nonnote_insn_bb (curr_insn); i != NULL_RTX; - i = prev_nonnote_insn_bb (i)) - { - if (LABEL_P (i) || BARRIER_P (i)) - break; - if (!NONJUMP_INSN_P (i)) - continue; - - if (reg_set_p (ext_reg, i)) - { - rtx set_op = XEXP (set_of (ext_reg, i), 1); - if (set_op == NULL_RTX) - break; - if (t_reg_operand (set_op, VOIDmode) - || negt_reg_operand (set_op, VOIDmode)) - operands[2] = ext_reg; - break; - } - } - + operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn); if (operands[2] == NULL_RTX) FAIL; } @@ -5850,11 +5838,23 @@ subreg_lowpart_offset (SImode, GET_MODE (op1))); }) -(define_insn "*extend<mode>si2_compact_reg" +(define_insn_and_split "*extend<mode>si2_compact_reg" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (sign_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))] "TARGET_SH1" "exts.<bw> %1,%0" + "&& can_create_pseudo_p ()" + [(set (match_dup 0) (match_dup 2))] +{ + /* Sometimes combine fails to combine a T bit or negated T bit store to a + reg with a following sign extension. In the split pass after combine, + try to figure the extended reg was set. If it originated from the T + bit we can replace the sign extension with a reg move, which will be + eliminated. */ + operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn); + if (operands[2] == NULL_RTX) + FAIL; +} [(set_attr "type" "arith")]) ;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute. @@ -6629,10 +6629,19 @@ ;; picked to load/store regs. If the regs regs are on the stack reload will ;; try other insns and not stick to movqi_reg_reg. ;; The same applies to the movhi variants. +;; +;; Notice, that T bit is not allowed as a mov src operand here. This is to +;; avoid things like (set (reg:QI) (subreg:QI (reg:SI T_REG) 0)), which +;; introduces zero extensions after T bit stores and redundant reg copies. +;; +;; FIXME: We can't use 'arith_reg_operand' (which disallows T_REG) as a +;; predicate for the mov src operand because reload will have trouble +;; reloading MAC subregs otherwise. For that probably special patterns +;; would be required. (define_insn "*mov<mode>_reg_reg" [(set (match_operand:QIHI 0 "arith_reg_dest" "=r") (match_operand:QIHI 1 "register_operand" "r"))] - "TARGET_SH1" + "TARGET_SH1 && !t_reg_operand (operands[1], VOIDmode)" "mov %1,%0" [(set_attr "type" "move")]) @@ -8178,28 +8187,17 @@ rtx testing_insn = NULL_RTX; rtx tested_reg = NULL_RTX; - for (rtx i = prev_nonnote_insn_bb (curr_insn); i != NULL_RTX; - i = prev_nonnote_insn_bb (i)) + set_of_reg s0 = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn, + prev_nonnote_insn_bb); + if (s0.set_src != NULL_RTX + && GET_CODE (s0.set_src) == EQ + && REG_P (XEXP (s0.set_src, 0)) + && satisfies_constraint_Z (XEXP (s0.set_src, 1))) { - if (LABEL_P (i) || BARRIER_P (i)) - break; - if (!NONJUMP_INSN_P (i)) - continue; - - rtx p = PATTERN (i); - if (p != NULL_RTX - && GET_CODE (p) == SET && t_reg_operand (XEXP (p, 0), VOIDmode) - && GET_CODE (XEXP (p, 1)) == EQ - && REG_P (XEXP (XEXP (p, 1), 0)) - && satisfies_constraint_Z (XEXP (XEXP (p, 1), 1))) - { - testing_insn = i; - tested_reg = XEXP (XEXP (p, 1), 0); - break; - } + testing_insn = s0.insn; + tested_reg = XEXP (s0.set_src, 0); } - - if (testing_insn == NULL_RTX) + else FAIL; /* Continue scanning the insns backwards and try to find the insn that @@ -8213,48 +8211,38 @@ if (reg_used_between_p (get_t_reg_rtx (), testing_insn, curr_insn)) FAIL; - for (rtx i = prev_nonnote_insn_bb (testing_insn); i != NULL_RTX; - i = prev_nonnote_insn_bb (i)) + while (true) { - if (LABEL_P (i) || BARRIER_P (i)) + set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn, + prev_nonnote_insn_bb); + if (s1.set_src == NULL_RTX) break; - if (!NONJUMP_INSN_P (i)) - continue; - if (reg_set_p (tested_reg, i)) + if (t_reg_operand (s1.set_src, VOIDmode)) + operands[2] = GEN_INT (treg_value ^ 1); + else if (negt_reg_operand (s1.set_src, VOIDmode)) + operands[2] = GEN_INT (treg_value); + else if (REG_P (s1.set_src)) { - const_rtx tested_reg_set = set_of (tested_reg, i); + /* If it's a reg-reg copy follow the copied reg. This can + happen e.g. when T bit store zero-extensions are + eliminated. */ + tested_reg = s1.set_src; + s0.insn = s1.insn; + continue; + } - /* It could also be a clobber... */ - if (tested_reg_set == NULL_RTX || GET_CODE (tested_reg_set) != SET) - break; + /* It's only safe to remove the testing insn if the T bit is not + modified between the testing insn and the insn that stores the + T bit. Notice that some T bit stores such as negc also modify + the T bit. */ + if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn) + || modified_in_p (get_t_reg_rtx (), s1.insn)) + operands[2] = NULL_RTX; - rtx set_op1 = XEXP (tested_reg_set, 1); - if (t_reg_operand (set_op1, VOIDmode)) - operands[2] = GEN_INT (treg_value ^ 1); - else if (negt_reg_operand (set_op1, VOIDmode)) - operands[2] = GEN_INT (treg_value); - else if (REG_P (set_op1)) - { - /* If it's a reg-reg copy follow the copied reg. This can - happen e.g. when T bit store zero-extensions are - eliminated. */ - tested_reg = set_op1; - continue; - } + break; + } - /* It's only safe to remove the testing insn if the T bit is not - modified between the testing insn and the insn that stores the - T bit. Notice that some T bit stores such as negc also modify - the T bit. */ - if (modified_between_p (get_t_reg_rtx (), i, testing_insn) - || modified_in_p (get_t_reg_rtx (), i)) - operands[2] = NULL_RTX; - - break; - } - } - if (operands[2] == NULL_RTX) FAIL; Index: gcc/testsuite/gcc.target/sh/pr51244-17.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr51244-17.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr51244-17.c (revision 0) @@ -0,0 +1,297 @@ +/* Check that no unnecessary zero extensions are done on values that are + results of arithmetic with T bit inputs. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "extu|exts" } } */ + +int +test00 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x == y; +} + +int +test01 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == d; + return x == y; +} + +int +test02 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c == d; + return x == y; +} + +int +test03 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != d; + return x == y; +} + +int +test04 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != d; + return x == y; +} + +int +test05 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x != y; +} + +int +test06 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x ^ y; +} + +int +test07 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x | y; +} + +int +test08 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x & y; +} + +int +test09 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == d; + return x != y; +} + +int +test10 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c == d; + return x != y; +} + +int +test11 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != d; + return x != y; +} + +int +test12 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != d; + return x != y; +} + +int +test13 (int a, int b, int c, int d, int e, int f) +{ + int x = a == b; + int y = c == 0; + int z = d == e; + return x == y || x == z; +} + +int +test14 (int a, int b, int c, int d, int e, int f) +{ + int x = a == b; + int y = c == 0; + int z = d == e; + return x == y && x == z; +} + +int +test15 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c == 0; + int z = d == e; + return x == y || x == z; +} + +int +test16 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c == 0; + int z = d == e; + return x == y && x == z; +} + +int +test17 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c != 0; + int z = d == e; + return x == y || x == z; +} + +int +test18 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c != 0; + int z = d == e; + return x == y && x == z; +} + +int +test19 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c != 0; + int z = d == e; + return x == y || x == z; +} + +int +test20 (int a, int b, int c, int d, int e, int f) +{ + int x = a != b; + int y = c != 0; + int z = d != e; + return x == y && x == z; +} + +int +test21 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x + y; +} + +int +test22 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c == 0; + return x + y; +} + +int +test23 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != 0; + return x + y; +} + +int +test24 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x - y; +} + +int +test25 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c == 0; + return x - y; +} + +int +test26 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != 0; + return x - y; +} + +int +test27 (int a, int b, int c, int d) +{ + int x = a == b; + int y = c == 0; + return x * y; +} + +int +test28 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c == 0; + return x * y; +} + +int +test29 (int a, int b, int c, int d) +{ + int x = a != b; + int y = c != 0; + return x * y; +} + +int +test30 (int a, int b) +{ + return ((a & 0x7F) == 1) + | ((a & 0xFF00) == 0x0200) + | ((a & 0xFF0000) == 0x030000); +} + +int +test31 (int a, int b) +{ + return ((a & 0x7F) == 1) + | ((a & 0xFF00) == 0x0200) + | ((a & 0xFF0000) == 0x030000) + | ((a & 0xFF000000) == 0x04000000); +} + +int +test32 (int* a, int b, int c, volatile char* d) +{ + d[1] = a[0] != 0; + return b; +} + +int +test33 (int* a, int b, int c, volatile char* d) +{ + d[1] = a[0] == 0; + return b; +} + +char +test34 (int a, int* b) +{ + return (b[4] & b[0] & a) == a; +} + +unsigned char +test35 (int a, int* b) +{ + return (b[4] & b[0] & a) == a; +}