https://gcc.gnu.org/g:0e45f9a77a54860cf026e2d41838fff66fe83d50
commit 0e45f9a77a54860cf026e2d41838fff66fe83d50 Author: Jeff Law <j...@ventanamicro.com> Date: Mon Oct 7 11:49:21 2024 -0600 [RISC-V] Add splitters to restore condops generation after recent phiopt changes V2: Fix typo in ChangeLog. Remove now extraneous comment in cset-sext.c. Throttle back branch cost to 1 in various tests -- Andrew P's recent improvements to phiopt regressed on the riscv testsuite. Essentially the new code presented to the RTL optimizers is straightline code rather than branchy for the CE pass to analyze and optimize. In the absence of conditional move support or sfb, the new code would be better. Unfortunately the presented form isn't a great fit for xventanacondops, zicond or xtheadcondmov. The net is the resulting code is actually slightly worse than before. Essentially sne+czero turned into sne+sne+and. Thankfully, combine is presented with (and (ne (op1) (const_int 0)) (ne (op2) (const_int 0))) As the RHS of a set. We can use a 3->2 splitter to guide combine on how to profitably rewrite the sequence in a form suitable for condops. Just splitting that would be enough to fix the regression, but I'm fairly confident that other cases need to be handled and would have regressed had the testsuite been more thorough. One arm of the AND is going to turn into an sCC instruction. We have a variety of those that we support. The codes vary as do the allowed operands of the sCC. That produces a set of new splitters to handle those cases. The other arm is going to turn into a czero (or similar) instruction. That one can be generalized to eq/ne. So another set for that generalization. We can remove a couple of XFAILs in the rv32 space as it's behaving much more like rv64 at this point. For SFB targets it's unclear if the new code is better or worse. In both cases it's a 3 instruction sequence. So I just adjusted the test. If the new code is worse for SFB, someone with an understanding of the tradeoffs for an SFB target will need to make adjustments. Tested in my tester on rv64gcv and rv32gc. Will wait for the pre-commit testers to render their verdict before moving forward. gcc/ * config/riscv/iterators.md (scc_0): New code iterator. * config/riscv/zicond.md: New splitters to improve code generated for cases like (and (scc) (scc)) for zicond, xventanacondops, xtheadcondmov. gcc/testsuite/ * gcc.target/riscv/cset-sext-sfb.c: Turn off ssa-phiopt. * gcc.target/riscv/cset-sext-thead.c: Do not check CE output anymore. * gcc.target/riscv/cset-sext-ventana.c: Similarly. Adjust branch cost. * gcc.target/riscv/cset-sext-zicond.c: Similarly. * gcc.target/riscv/cset-sext.c: Similarly. No longer allow "neg" in asm output. (cherry picked from commit a2a956cf26e645bfddbc0b743b97472e298c7a8c) Diff: --- gcc/config/riscv/iterators.md | 2 + gcc/config/riscv/zicond.md | 112 +++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c | 12 +-- gcc/testsuite/gcc.target/riscv/cset-sext-thead.c | 3 +- gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c | 3 +- gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c | 9 +- gcc/testsuite/gcc.target/riscv/cset-sext.c | 11 +- 7 files changed, 131 insertions(+), 21 deletions(-) diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 2844cb02ff09..872c542e9065 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -233,6 +233,8 @@ (define_code_iterator any_ge [ge geu]) (define_code_iterator any_lt [lt ltu]) (define_code_iterator any_le [le leu]) +;; Iterators for conditions we can emit a sCC against 0 or a reg directly +(define_code_iterator scc_0 [eq ne gt gtu]) ; atomics code iterator (define_code_iterator any_atomic [plus ior xor and]) diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md index 3876be7f9d29..ab1a5337ee53 100644 --- a/gcc/config/riscv/zicond.md +++ b/gcc/config/riscv/zicond.md @@ -124,3 +124,115 @@ { operands[2] = GEN_INT (1 << UINTVAL(operands[2])); }) + +;; In some cases gimple can give us a sequence with a logical and +;; of two sCC insns. This can be implemented an sCC feeding a +;; conditional zero. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0)) + (scc_0:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "reg_or_0_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (scc_0:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + +;; Similarly but GE/GEU which requires (const_int 1) as an operand. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_ge:X (match_operand:X 2 "register_operand") + (const_int 1)))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 3) (any_ge:X (match_dup 2) (const_int 1))) + (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 3)))]) + +;; Similarly but LU/LTU which allows an arith_operand +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_lt:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "arith_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (any_lt:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + +;; Finally LE/LEU which requires sle_operand. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_le:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "sle_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (any_le:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + + +;; Inverted versions from above. I tried to get this to work with +;; iterators, but didn't have any success disambiguating the code attr +;; for the eq/ne flip we have to do. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0)) + (scc_0:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "reg_or_0_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (scc_0:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + +;; Similarly but GE/GEU which requires (const_int 1) as an operand. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_ge:X (match_operand:X 2 "register_operand") + (const_int 1)))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 3) (any_ge:X (match_dup 2) (const_int 1))) + (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 3)))]) + +;; Similarly but LU/LTU which allows an arith_operand +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_lt:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "arith_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (any_lt:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + +;; Finally LE/LEU which requires sle_operand. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0)) + (any_le:X (match_operand:X 2 "register_operand") + (match_operand:X 3 "sle_operand")))) + (clobber (match_operand:X 4 "register_operand"))] + "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV" + [(set (match_dup 4) (any_le:X (match_dup 2) (match_dup 3))) + (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0)) + (const_int 0) + (match_dup 4)))]) + + + diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c index 1a3e7104bd8c..6e9f8cc61de0 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext-sfb.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ -/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ -/* { dg-options "-march=rv32gc -mtune=sifive-7-series -mbranch-cost=1 -fdump-rtl-ce1" { target { rv32 } } } */ -/* { dg-options "-march=rv64gc -mtune=sifive-7-series -mbranch-cost=1 -fdump-rtl-ce1" { target { rv64 } } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */ +/* { dg-options "-march=rv32gc -mtune=sifive-7-series -mbranch-cost=1 -fno-ssa-phiopt -fdump-rtl-ce1" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc -mtune=sifive-7-series -mbranch-cost=1 -fno-ssa-phiopt -fdump-rtl-ce1" { target { rv64 } } } */ int foo (long a, long b) @@ -22,7 +22,7 @@ foo (long a, long b) 1: */ -/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" { xfail { rv32 && { any-opts "-O1" } } } } } */ +/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" } } /* { dg-final { scan-assembler-times "\\ssnez\\s" 1 } } */ -/* { dg-final { scan-assembler-times "\\sbne\\s\[^\\s\]+\\s# movcc\\s" 1 { xfail { rv32 && { any-opts "-O1" } } } } } */ -/* { dg-final { scan-assembler-not "\\sbeq\\s" { xfail { rv32 && { any-opts "-O1" } } } } } */ +/* { dg-final { scan-assembler-times "\\sbne\\s\[^\\s\]+\\s# movcc\\s" 1 } } */ +/* { dg-final { scan-assembler-not "\\sbeq\\s" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-thead.c b/gcc/testsuite/gcc.target/riscv/cset-sext-thead.c index 45b94704aaf6..74cae71d7a0b 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext-thead.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext-thead.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target rv64 } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ -/* { dg-options "-march=rv64gc_xtheadcondmov -mtune=thead-c906 -mbranch-cost=1 -fdump-rtl-ce1" } */ +/* { dg-options "-march=rv64gc_xtheadcondmov -mtune=thead-c906 -mbranch-cost=1" } */ int foo (long a, long b) @@ -20,7 +20,6 @@ foo (long a, long b) th.mveqz a0,zero,a1 */ -/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" } } */ /* { dg-final { scan-assembler-times "\\ssnez\\s" 1 } } */ /* { dg-final { scan-assembler-times "\\s(?:th\\.mveqz|th\\.mvnez)\\s" 1 } } */ /* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c b/gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c index eac1e1376cb4..8c3ca98bc2fc 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext-ventana.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target rv64 } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ -/* { dg-options "-march=rv64gc_xventanacondops -mtune=rocket -mbranch-cost=3 -fdump-rtl-ce1" } */ +/* { dg-options "-march=rv64gc_xventanacondops -mtune=rocket -mbranch-cost=1" } */ int foo (long a, long b) @@ -20,7 +20,6 @@ foo (long a, long b) vt.maskc a0,a0,a1 */ -/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" } } */ /* { dg-final { scan-assembler-times "\\ssnez\\s" 1 } } */ /* { dg-final { scan-assembler-times "\\svt\\.maskc\\s" 1 } } */ /* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c b/gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c index a526b0c3d5a6..ec715254d4e2 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext-zicond.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */ -/* { dg-options "-march=rv64gc_zicond -mtune=rocket -mbranch-cost=3 -fdump-rtl-ce1" { target { rv64 } } } */ -/* { dg-options "-march=rv32gc_zicond -mtune=rocket -mbranch-cost=3 -fdump-rtl-ce1" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc_zicond -mtune=rocket -mbranch-cost=1" { target { rv64 } } } */ +/* { dg-options "-march=rv32gc_zicond -mtune=rocket -mbranch-cost=1" { target { rv32 } } } */ int foo (long a, long b) @@ -20,7 +20,6 @@ foo (long a, long b) czero.eqz a0,a0,a1 */ -/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" { xfail { rv32 && { any-opts "-O1" "-Os" "-Oz" } } } } } */ /* { dg-final { scan-assembler-times "\\ssnez\\s" 1 } } */ -/* { dg-final { scan-assembler-times "\\sczero\\.eqz\\s" 1 { xfail { rv32 && { any-opts "-O1" "-Os" "-Oz" } } } } } */ -/* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" { xfail { rv32 && { any-opts "-O1" "-Os" "-Oz" } } } } } */ +/* { dg-final { scan-assembler-times "\\sczero\\.eqz\\s" 1 } } */ +/* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/cset-sext.c b/gcc/testsuite/gcc.target/riscv/cset-sext.c index a1293cd62ea7..e1c3239d1461 100644 --- a/gcc/testsuite/gcc.target/riscv/cset-sext.c +++ b/gcc/testsuite/gcc.target/riscv/cset-sext.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */ -/* { dg-options "-march=rv32gc -mtune=sifive-5-series -mbranch-cost=6 -mmovcc -fdump-rtl-ce1" { target { rv32 } } } */ -/* { dg-options "-march=rv64gc -mtune=sifive-5-series -mbranch-cost=6 -mmovcc -fdump-rtl-ce1" { target { rv64 } } } */ +/* { dg-options "-march=rv32gc -mtune=sifive-5-series -mbranch-cost=1 -mmovcc" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc -mtune=sifive-5-series -mbranch-cost=1 -mmovcc" { target { rv64 } } } */ int foo (long a, long b) @@ -17,11 +17,10 @@ foo (long a, long b) /* Expect branchless assembly like: snez a1,a1 - neg a1,a1 snez a0,a0 and a0,a1,a0 */ -/* { dg-final { scan-rtl-dump-times "if-conversion succeeded through noce_try_cmove_arith" 1 "ce1" { xfail { rv32 && { any-opts "-O1" } } } } } */ -/* { dg-final { scan-assembler-times "\\ssnez\\s" 2 { xfail { rv32 && { any-opts "-O1" } } } } } */ -/* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" { xfail { rv32 && { any-opts "-O1" } } } } } */ +/* { dg-final { scan-assembler-times "\\ssnez\\s" 2 } } */ +/* { dg-final { scan-assembler-not "\\s(?:beq|bne)\\s" } } */ +/* { dg-final { scan-assembler-not "\\sneg\\s" } } */