Hello, This patch does some further improvements to the utilization of rotate insns on SH. Tested on rev 191657 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" and no new failures.
OK? (BTW, comparing test summaries of rev. 191342 and rev. 191657 shows a lot of new PCH related failures on my xgcc SH setup) Cheers, Oleg gcc/ChangeLog: PR target/54089 * config/sh/constraints.md (Jhb): New constraint. * config/sh/predicates.md (negt_reg_shl31_operand): New predicate. * config/sh/sh.md (rotrsi3): New expander. (rotrsi3_1, *rotrsi3_1, *rotlsi3_1): New insns. (rotlsi3, rotlhi3): Use const_int_operand predicate instead of immediate_operand and remove CONST_INT_P checks in expansion code. (*rotcr): Cleanup variable usage. Handle preceding nott insn. Add split with swapped operands. (*rotcr_neg_t, *movt_msb, *negt_msb): New insns and splits. testsuite/ChangeLog: PR target/54089 * gcc.target/sh/pr54089-1.c (test_15, test_16, test_17, test_18, test_19, test_20, test_21, test_22, test_23): New functions. * gcc.target/sh/pr54089-4.c: New. * gcc.target/sh/pr54089-5.c: New. * gcc.target/sh/pr54089-6.c: New. * gcc.target/sh/pr54089-7.c: New.
Index: gcc/testsuite/gcc.target/sh/pr54089-4.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54089-4.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54089-4.c (revision 0) @@ -0,0 +1,15 @@ +/* Check that the rotcr instruction is generated when shifting the + negated T bit on non-SH2A. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" "-m2a*" } { "" } } */ +/* { dg-final { scan-assembler-times "rotcr" 1 } } */ +/* { dg-final { scan-assembler-times "tst" 1 } } */ +/* { dg-final { scan-assembler-times "movt" 1 } } */ + +int +test_00 (int a, int b) +{ + int r = a != b; + return r << 31; +} Index: gcc/testsuite/gcc.target/sh/pr54089-6.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54089-6.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54089-6.c (revision 0) @@ -0,0 +1,30 @@ +/* Check that the rotr and rotl instructions are generated. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-times "rotr" 2 } } */ +/* { dg-final { scan-assembler-times "rotl" 2 } } */ + +int +test_00 (int a) +{ + return (a << 1) | ((a >> 31) & 1); +} + +int +test_01 (int a) +{ + return (a << 1) | ((unsigned int)a >> 31); +} + +int +test_02 (int a) +{ + return ((unsigned int)a >> 1) | (a << 31); +} + +int +test_03 (int a) +{ + return ((a >> 1) & 0x7FFFFFFF) | (a << 31); +} Index: gcc/testsuite/gcc.target/sh/pr54089-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54089-1.c (revision 191657) +++ gcc/testsuite/gcc.target/sh/pr54089-1.c (working copy) @@ -2,7 +2,7 @@ /* { dg-do compile { target "sh*-*-*" } } */ /* { dg-options "-O1" } */ /* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ -/* { dg-final { scan-assembler-times "rotcr" 15 } } */ +/* { dg-final { scan-assembler-times "rotcr" 24 } } */ /* { dg-final { scan-assembler-times "shll\t" 1 } } */ typedef char bool; @@ -109,3 +109,66 @@ bool r = b < 0; return ((a >> 1) | (r << 31)); } + +unsigned int +test_15 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 1) | (r << 31)); +} + +unsigned int +test_16 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 2) | (r << 31)); +} + +unsigned int +test_17 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 3) | (r << 31)); +} + +unsigned int +test_18 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 4) | (r << 31)); +} + +unsigned int +test_19 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 5) | (r << 31)); +} + +unsigned int +test_20 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 6) | (r << 31)); +} + +unsigned int +test_21 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 7) | (r << 31)); +} + +unsigned int +test_22 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 8) | (r << 31)); +} + +unsigned int +test_23 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a >> 31) | (r << 31)); +} Index: gcc/testsuite/gcc.target/sh/pr54089-5.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54089-5.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54089-5.c (revision 0) @@ -0,0 +1,14 @@ +/* Check that the movrt rotr instruction sequence is generated when shifting + the negated T bit on SH2A. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */ +/* { dg-final { scan-assembler-times "movrt" 1 } } */ +/* { dg-final { scan-assembler-times "rotr" 1 } } */ + +int +test_00 (int a, int b) +{ + int r = a != b; + return r << 31; +} Index: gcc/testsuite/gcc.target/sh/pr54089-7.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54089-7.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54089-7.c (revision 0) @@ -0,0 +1,63 @@ +/* Check that the rotcr instruction is generated. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-times "rotcr" 4 } } */ +/* { dg-final { scan-assembler-not "movt" } } */ +/* { dg-final { scan-assembler-not "or\t" } } */ +/* { dg-final { scan-assembler-not "rotr" } } */ +/* { dg-final { scan-assembler-not "and" } } */ + +typedef char bool; + +int +test_00 (int* a, int* b) +{ + int i; + unsigned int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (t << 31) | (r >> 1); + } + return r; +} + +int +test_01 (int* a, int* b) +{ + int i; + unsigned int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (t << 31) | (r >> 2); + } + return r; +} + +int +test_02 (int* a, int* b) +{ + int i; + unsigned int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (t << 31) | (r >> 3); + } + return r; +} + +unsigned int +test_03 (const bool* a) +{ + int i; + unsigned int r = 0; + for (i = 0; i < 32; ++i) + { + bool t = a[i]; + r = (t << 31) | (r >> 1); + } + return r; +} Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 191657) +++ gcc/config/sh/sh.md (working copy) @@ -3817,6 +3817,42 @@ GEN_INT (56), GEN_INT (8)); }) +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "arith_reg_dest") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1" +{ + HOST_WIDE_INT ival = INTVAL (operands[2]); + if (ival == 1) + { + emit_insn (gen_rotrsi3_1 (operands[0], operands[1])); + DONE; + } + + FAIL; +}) + +(define_insn "rotrsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (and:SI (match_dup 1) (const_int 1)))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + +;; A slimplified version of rotr for combine. +(define_insn "*rotrsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + (define_insn "rotlsi3_1" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -3827,6 +3863,16 @@ "rotl %0" [(set_attr "type" "arith")]) +;; A simplified version of rotl for combine. +(define_insn "*rotlsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotl %0" + [(set_attr "type" "arith")]) + (define_insn "rotlsi3_31" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -3845,9 +3891,9 @@ [(set_attr "type" "arith")]) (define_expand "rotlsi3" - [(set (match_operand:SI 0 "arith_reg_dest" "") - (rotate:SI (match_operand:SI 1 "arith_reg_operand" "") - (match_operand:SI 2 "immediate_operand" "")))] + [(set (match_operand:SI 0 "arith_reg_dest") + (rotate:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))] "TARGET_SH1" { static const char rot_tab[] = { @@ -3857,12 +3903,8 @@ 002, 002, 010, 000, 000, 000, 000, 000, }; - int count, choice; - - if (!CONST_INT_P (operands[2])) - FAIL; - count = INTVAL (operands[2]); - choice = rot_tab[count]; + int count = INTVAL (operands[2]); + int choice = rot_tab[count]; if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1) FAIL; choice &= 7; @@ -3908,12 +3950,12 @@ [(set_attr "type" "arith")]) (define_expand "rotlhi3" - [(set (match_operand:HI 0 "arith_reg_operand" "") - (rotate:HI (match_operand:HI 1 "arith_reg_operand" "") - (match_operand:HI 2 "immediate_operand" "")))] + [(set (match_operand:HI 0 "arith_reg_operand") + (rotate:HI (match_operand:HI 1 "arith_reg_operand") + (match_operand:HI 2 "const_int_operand")))] "TARGET_SH1" { - if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 8) + if (INTVAL (operands[2]) != 8) FAIL; }) @@ -3950,11 +3992,7 @@ { if (INTVAL (operands[2]) > 1) { - /* use plus_constant function ?? */ - const int shift_count = INTVAL (operands[2]) - 1; - const rtx shift_count_rtx = GEN_INT (shift_count); - rtx shift_res = gen_reg_rtx (SImode); - + const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1); rtx prev_set_t_insn = NULL_RTX; rtx tmp_t_reg = NULL_RTX; @@ -3963,10 +4001,24 @@ shift insn before that insn, to remove the T_REG dependency. If the insn that sets the T_REG cannot be found, store the T_REG in a temporary reg and restore it after the shift. */ - if (sh_lshrsi_clobbers_t_reg_p (shift_count_rtx) - && ! sh_dynamicalize_shift_p (shift_count_rtx)) + if (sh_lshrsi_clobbers_t_reg_p (shift_count) + && ! sh_dynamicalize_shift_p (shift_count)) { prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); + + /* Skip the nott insn, which was probably inserted by the splitter + of *rotcr_neg_t. Don't use one of the recog functions + here during insn splitting, since that causes problems in later + passes. */ + if (prev_set_t_insn != NULL_RTX) + { + rtx pat = PATTERN (prev_set_t_insn); + if (GET_CODE (pat) == SET + && t_reg_operand (XEXP (pat, 0), SImode) + && negt_reg_operand (XEXP (pat, 1), SImode)) + prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn); + } + if (! (prev_set_t_insn != NULL_RTX && reg_set_p (get_t_reg_rtx (), prev_set_t_insn) && ! reg_referenced_p (get_t_reg_rtx (), @@ -3978,14 +4030,15 @@ } } - rtx shift_rtx = gen_lshrsi3 (shift_res, operands[1], shift_count_rtx); - operands[1] = shift_res; + rtx shift_result = gen_reg_rtx (SImode); + rtx shift_insn = gen_lshrsi3 (shift_result, operands[1], shift_count); + operands[1] = shift_result; /* Emit the shift insn before the insn that sets T_REG, if possible. */ if (prev_set_t_insn != NULL_RTX) - emit_insn_before (shift_rtx, prev_set_t_insn); + emit_insn_before (shift_insn, prev_set_t_insn); else - emit_insn (shift_rtx); + emit_insn (shift_insn); /* Restore T_REG if it has been saved before. */ if (tmp_t_reg != NULL_RTX) @@ -4007,6 +4060,20 @@ DONE; }) +;; If combine tries the same as above but with swapped operands, split +;; it so that it will try the pattern above. +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand") + (const_int 31)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand"))))] + "TARGET_SH1 && can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3)) + (ashift:SI (match_dup 1) (const_int 31)))) + (clobber (reg:SI T_REG))])]) + ;; rotcr combine bridge pattern which will make combine try out more ;; complex patterns. (define_insn_and_split "*rotcr" @@ -4040,6 +4107,41 @@ DONE; }) +;; rotcr combine patterns for rotating in the negated T_REG value. +(define_insn_and_split "*rotcr_neg_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (match_operand:SI 1 "negt_reg_shl31_operand") + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3)) + (ashift:SI (reg:SI T_REG) (const_int 31)))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_nott (get_t_reg_rtx ())); +}) + +(define_insn_and_split "*rotcr_neg_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SI 3 "negt_reg_shl31_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) + (ashift:SI (reg:SI T_REG) (const_int 31)))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_nott (get_t_reg_rtx ())); +}) + ;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ;; SImode shift left @@ -10720,6 +10822,53 @@ operands[0] = gen_reg_rtx (SImode); }) +;; Store T bit as MSB in a reg. +;; T = 0: 0x00000000 -> reg +;; T = 1: 0x80000000 -> reg +(define_insn_and_split "*movt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (mult:SI (match_operand:SI 1 "t_reg_operand") + (const_int -2147483648))) ;; 0xffffffff80000000 + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (ashift:SI (reg:SI T_REG) (const_int 31)))]) + +;; Store inverted T bit as MSB in a reg. +;; T = 0: 0x80000000 -> reg +;; T = 1: 0x00000000 -> reg +;; On SH2A we can get away without clobbering the T_REG. +(define_insn_and_split "*negt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "negt_reg_shl31_operand"))] + "TARGET_SH2A" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_movrt (tmp, get_t_reg_rtx ())); + emit_insn (gen_rotrsi3 (operands[0], tmp, const1_rtx)); + DONE; +}) + +(define_insn_and_split "*negt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "negt_reg_shl31_operand")) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && !TARGET_SH2A" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, get_t_reg_rtx ()); + emit_insn (gen_cmpeqsi_t (tmp, const0_rtx)); + emit_insn (gen_rotcr (operands[0], tmp, get_t_reg_rtx ())); + DONE; +}) + ;; The *cset_zero patterns convert optimizations such as ;; "if (test) x = 0;" to "x &= -(test == 0);" ;; back to conditional branch sequences if zero-displacement branches Index: gcc/config/sh/predicates.md =================================================================== --- gcc/config/sh/predicates.md (revision 191657) +++ gcc/config/sh/predicates.md (working copy) @@ -1033,3 +1033,41 @@ (define_predicate "arith_reg_or_t_reg_operand" (ior (match_operand 0 "arith_reg_operand") (match_operand 0 "t_reg_operand"))) + +;; A predicate describing the negated value of the T bit register shifted +;; left by 31. +(define_predicate "negt_reg_shl31_operand" + (match_code "plus,minus,if_then_else") +{ + /* (plus:SI (mult:SI (match_operand:SI 1 "t_reg_operand") + (const_int -2147483648)) ;; 0xffffffff80000000 + (const_int -2147483648)) + */ + if (GET_CODE (op) == PLUS && satisfies_constraint_Jhb (XEXP (op, 1)) + && GET_CODE (XEXP (op, 0)) == MULT + && t_reg_operand (XEXP (XEXP (op, 0), 0), SImode) + && satisfies_constraint_Jhb (XEXP (XEXP (op, 0), 1))) + return true; + + /* (minus:SI (const_int -2147483648) ;; 0xffffffff80000000 + (mult:SI (match_operand:SI 1 "t_reg_operand") + (const_int -2147483648))) + */ + if (GET_CODE (op) == MINUS + && satisfies_constraint_Jhb (XEXP (op, 0)) + && GET_CODE (XEXP (op, 1)) == MULT + && t_reg_operand (XEXP (XEXP (op, 1), 0), SImode) + && satisfies_constraint_Jhb (XEXP (XEXP (op, 1), 1))) + return true; + + /* (if_then_else:SI (match_operand:SI 1 "t_reg_operand") + (const_int 0) + (const_int -2147483648)) ;; 0xffffffff80000000 + */ + if (GET_CODE (op) == IF_THEN_ELSE && t_reg_operand (XEXP (op, 0), SImode) + && satisfies_constraint_Z (XEXP (op, 1)) + && satisfies_constraint_Jhb (XEXP (op, 2))) + return true; + + return false; +}) Index: gcc/config/sh/constraints.md =================================================================== --- gcc/config/sh/constraints.md (revision 191657) +++ gcc/config/sh/constraints.md (working copy) @@ -33,6 +33,7 @@ ;; J16: 0xffffffff00000000 | 0x00000000ffffffff ;; Jmb: 0x000000FF ;; Jmw: 0x0000FFFF +;; Jhb: 0x80000000 ;; Kxx: unsigned xx bit ;; M: 1 ;; N: 0 @@ -148,6 +149,11 @@ (and (match_code "const_int") (match_test "ival == 0xFFFF"))) +(define_constraint "Jhb" + "Highest bit constant" + (and (match_code "const_int") + (match_test "(ival & 0xFFFFFFFF) == 0x80000000"))) + (define_constraint "K03" "An unsigned 3-bit constant, as used in SH2A bclr, bset, etc." (and (match_code "const_int")