So this is a slightly scaled back variant of a patch I've been working
on. I'd originally planned to handle both zero and sign extensions, but
there's some fallout with the sign extension adjustments that I'm going
to need more time to tackle. This piece stands on its own and unlocks a
subsequent patch to improve codegen. No sense in having it possibly
miss the merge window.
This patch adjusts the core zero-extension patterns as well as one
closely related combiner pattern.
For the named expanders, we now generate shift pairs if the Zba/Zbb
extensions are not available and the source operand is a REG. Things
are kept as-is for MEMs.
The existing define_insn_and_split it turned into a define_insn that
only handles MEM sources. Those instructions are always available, so
no need to mess with shift pairs. This avoids regressions with a
follow-up patch which enhances a closely related combiner pattern.
That closely related combiner pattern is a define_insn_and_split which
can now turn into a simpler define_split. So that's adjusted as well.
The net is we drop 3 define_insn_and_splits and occasionally get better
code as a result. It also makes it possible to improve some additional
cases which I'll handle as a followup.
The test changes are minimal and mostly related to making sure we have
the right Zb* things enabled based on what the test relies on under the
hood. It's not even clear that part of the change is strictly necessary
anymore. I see it more as test hygiene than anything.
This has been bootstrapped and regression tested on the Pioneer which is
a good test since it doesn't have any of the Zb* extensions and thus
relies heavily on the shift-pair approach to zero extensions.
riscv32-elf and riscv64-elf have also been regression tested. The BPI
hasn't started chewing on this patch yet.
Obviously waiting on pre-commit CI before moving forward.
Jeff
gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
for Zbkb.
* config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
when the appropriate instructions are not available.
(zero_extendhi<GPR:mode>2): Simlarly.
(*zero_extendsidi2_internal): Make a simple define_insn. Only handle
MEM sources.
(*zero_extendhi<GPR2:mode>2): Similarly.
(zero_extendsidi2_shifted): Turn into a define_split.
gcc/testsuite/
* gcc.target/riscv/slt-1.c: Skip for -Oz as well.
* gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
* gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
* gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3c994a0cd55e..fbf419dde5a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4256,6 +4256,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
gcc_fallthrough ();
case IOR:
case XOR:
+ /* packh for zbkb. Alternate forms haven't shown up as a
+ costing problem. Obviously we can add the additional
+ variants if needed. */
+ if (TARGET_ZBKB
+ && GET_CODE (x) == IOR
+ && GET_CODE (XEXP (x, 0)) == AND
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+ && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+ && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ && GET_MODE (XEXP (x, 1)) == word_mode
+ && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+ {
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
/* orn, andn and xorn pattern for zbb. */
if (TARGET_ZBB
&& GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fced2da2e604..de898d707d13 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1851,33 +1851,41 @@ (define_expand "zero_extendsidi2"
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
"TARGET_64BIT"
{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
&& SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
{
emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
DONE;
}
+
+ /* If the source is a register and we do not have ZBA or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBA && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], SImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (DImode);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ rtx shiftval = GEN_INT (32);
+ rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
})
-(define_insn_and_split "*zero_extendsidi2_internal"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
"TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
- && !TARGET_XANDESPERF
- && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
- "@
- #
- lwu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:DI (match_dup 1) (const_int 32)))
- (set (match_dup 0)
- (lshiftrt:DI (match_dup 0) (const_int 32)))]
- { operands[1] = gen_lowpart (DImode, operands[1]); }
- [(set_attr "move_type" "shift_shift,load")
+ && !TARGET_XANDESPERF"
+ "lwu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "DI")])
@@ -1885,29 +1893,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
[(set (match_operand:GPR 0 "register_operand")
(zero_extend:GPR
(match_operand:HI 1 "nonimmediate_operand")))]
- "")
+ ""
+{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
+ if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+ && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+ {
+ emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+ DONE;
+ }
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
- [(set (match_operand:GPR 0 "register_operand" "=r,r")
- (zero_extend:GPR
- (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+ /* If the source is a register and we do not have ZBB or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBB && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], HImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+ operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+ rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+ rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
"!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
&& !TARGET_XANDESPERF"
- "@
- #
- lhu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:GPR (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
- {
- operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
- operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
- }
- [(set_attr "move_type" "shift_shift,load")
+ "lhu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "<GPR:MODE>")])
@@ -3147,23 +3185,19 @@ (define_split
;; occur when unsigned int is used for array indexing. Split this into two
;; shifts. Otherwise we can get 3 shifts.
-(define_insn_and_split "zero_extendsidi2_shifted"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
- (match_operand:QI 2 "immediate_operand" "I"))
- (match_operand 3 "immediate_operand" "")))
- (clobber (match_scratch:DI 4 "=&r"))]
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "immediate_operand"))
+ (match_operand 3 "immediate_operand")))
+ (clobber (match_operand:DI 4 "register_operand"))]
"TARGET_64BIT && !TARGET_ZBA
&& ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
- "#"
- "&& reload_completed"
[(set (match_dup 4)
(ashift:DI (match_dup 1) (const_int 32)))
(set (match_dup 0)
(lshiftrt:DI (match_dup 4) (match_dup 5)))]
- "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
- [(set_attr "type" "shift")
- (set_attr "mode" "DI")])
+ "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));")
;;
;; ....................
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a640660810..7a1eaf51f43d 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e7..ca80e874e8d1 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2c..1e100b555c2e 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d2987..1bebc36c31c8 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
typedef unsigned long uint64_t;
typedef unsigned int uint32_t;