Dimitar Dimitrov <dimi...@dinux.eu> writes: > The recent gcc.dg/tree-ssa/clz-char.c test case failed for PRU target, > exposing a wrong code generation bug in the PRU backend. The "clz" > pattern did not produce correct output for QI and HI input operand > modes. SI mode is ok. > > The "clz" pattern is expanded to an LMBD instruction to get the > left-most bit position having value "1". In turn, to get the correct > "clz" value, that bit position must be subtracted from the MSB bit > position of the input operand. The old behaviour of hard-coding 31 > for MSB bit position is wrong. > > The LMBD instruction returns 32 if input operand is zero, irrespective > of its register mode. This maps nicely for SI mode, where the "clz" > pattern outputs -1. It also leads to peculiar (but valid!) output > values from the "clz" pattern for QI and HI zero-valued inputs. > > Pushed to trunk. This patch is confined to the PRU backend only, > and is fixing a wrong code generation. Hence I deem it suitable > for stage 4. > > Is it ok to push a backport to gcc-11 and gcc-12 branches?
Since it's a backend-specific patch, I think that's your call. Thanks, Richard > gcc/ChangeLog: > > * config/pru/pru.h (CLZ_DEFINED_VALUE_AT_ZERO): Fix value for QI > and HI input modes. > * config/pru/pru.md (clz): Fix generated code for QI and HI > input modes. > > gcc/testsuite/ChangeLog: > > * gcc.target/pru/clz-hi-2.c: New test. > * gcc.target/pru/clz-hi.c: New test. > > Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu> > --- > gcc/config/pru/pru.h | 5 ++-- > gcc/config/pru/pru.md | 15 ++++++++--- > gcc/testsuite/gcc.target/pru/clz-hi-2.c | 24 +++++++++++++++++ > gcc/testsuite/gcc.target/pru/clz-hi.c | 35 +++++++++++++++++++++++++ > 4 files changed, 74 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi-2.c > create mode 100644 gcc/testsuite/gcc.target/pru/clz-hi.c > > diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h > index 3658036cccb..1b5e874bc06 100644 > --- a/gcc/config/pru/pru.h > +++ b/gcc/config/pru/pru.h > @@ -566,8 +566,9 @@ do { > \ > > #define CASE_VECTOR_MODE Pmode > > -/* See definition of clz pattern for rationale of value -1. */ > -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2) > +/* See definition of clz pattern for rationale of the value. */ > +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > + ((VALUE) = GET_MODE_BITSIZE (MODE) - 1 - 32, 2) > > /* Jumps are cheap on PRU. */ > #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 > diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md > index dfe08071e04..aa8e42a3587 100644 > --- a/gcc/config/pru/pru.md > +++ b/gcc/config/pru/pru.md > @@ -1723,8 +1723,16 @@ (define_insn "pru_halt" > [(set_attr "type" "control")]) > > ;; Count Leading Zeros implemented using LMBD. > -;; LMBD returns 32 if bit value is not present, and we subtract 31 to get > CLZ. > -;; Hence we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO. > +;; > +;; LMBD returns 32 if bit value is not present, for any kind of input MODE. > +;; The LMBD's search result for a "1" bit is subtracted from the > +;; mode bit size minus one, in order to get CLZ. > +;; > +;; Hence for SImode we get a defined value -1 for CLZ_DEFINED_VALUE_AT_ZERO. > +;; > +;; The QImode and HImode defined values for zero inputs end up to be > +;; non-standard (-25 and -17). But this is considered acceptable in > +;; order to keep the CLZ expansion to only two instructions. > (define_expand "clz<mode>2" > [(set (match_operand:QISI 0 "register_operand") > (clz:QISI (match_operand:QISI 1 "register_operand")))] > @@ -1735,7 +1743,8 @@ (define_expand "clz<mode>2" > rtx tmpval = gen_reg_rtx (<MODE>mode); > > emit_insn (gen_pru_lmbd (<MODE>mode, tmpval, src, const1_rtx)); > - emit_insn (gen_sub3_insn (dst, GEN_INT (31), tmpval)); > + int msb_bitn = GET_MODE_BITSIZE (<MODE>mode) - 1; > + emit_insn (gen_sub3_insn (dst, GEN_INT (msb_bitn), tmpval)); > DONE; > }) > > diff --git a/gcc/testsuite/gcc.target/pru/clz-hi-2.c > b/gcc/testsuite/gcc.target/pru/clz-hi-2.c > new file mode 100644 > index 00000000000..af877c7021e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/pru/clz-hi-2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fno-tree-ch" } */ > + > +/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */ > + > +#define PREC (sizeof(short) * 8) > + > +int > +__attribute__ ((noinline, noclone)) > +foo (unsigned short b) { > + int c = 0; > + > + if (b == 0) > + return PREC; > + > + while (!(b & (1 << (PREC - 1)))) { > + b <<= 1; > + c++; > + } > + > + return c; > +} > + > +/* { dg-final { scan-assembler "lmbd\\tr\[012\]\[0-9\]?.w\[0-2\], > r\[012\]\[0-9\]?.w\[0-2\], 1" } } */ > diff --git a/gcc/testsuite/gcc.target/pru/clz-hi.c > b/gcc/testsuite/gcc.target/pru/clz-hi.c > new file mode 100644 > index 00000000000..9350913b6d5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/pru/clz-hi.c > @@ -0,0 +1,35 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fno-tree-ch -fdump-tree-optimized" } */ > + > +/* This test case is based on gcc.dg/tree-ssa/clz-char.c. */ > + > +#define PREC (sizeof(short) * 8) > + > +int > +__attribute__ ((noinline, noclone)) > +foo (unsigned short b) { > + int c = 0; > + > + if (b == 0) > + return PREC; > + > + while (!(b & (1 << (PREC - 1)))) { > + b <<= 1; > + c++; > + } > + > + return c; > +} > + > +int main() > +{ > + if (foo(0) != PREC) > + __builtin_abort (); > + if (foo(1 << (PREC - 1)) != 0) > + __builtin_abort (); > + if (foo(35) != PREC - 6) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } > } */