Hi! The following patch fixes wrong-code on the following testcase extracted from pseudo-RNG with e.g. -march=zEC12 -O2. The problem is in the instruction emitted by the *r<noxa>sbg_sidi_srl patterns. We have in *.final correct: (insn 67 65 68 (parallel [ (set (reg:SI 1 %r1 [189]) (xor:SI (subreg:SI (zero_extract:DI (reg/v:DI 11 %r11 [orig:89 th ] [89]) (const_int 32 [0x20]) (const_int 24 [0x18])) 4) (reg:SI 1 %r1 [187]))) (clobber (reg:CC 33 %cc)) ]) "pr89369.c":44:73 1419 {*rxsbg_sidi_srl} (expr_list:REG_DEAD (reg/v:DI 11 %r11 [orig:89 th ] [89]) (expr_list:REG_UNUSED (reg:CC 33 %cc) (nil)))) which is effectively (reg:SI %r1) ^= (unsigned) ((reg:DI %r11) >> 8). But the pattern emits rxsbg %r1,%r11,40,63,56 which is effectively (reg:SI %r1) ^= ((unsigned) ((reg:DI %r11) >> 8) & 0xffffff) or equivalently (reg:SI %r1) ^= ((reg:SI %r11) >> 8). Even in the pattern one can see that it wants to extract exactly 32 bits always, no matter what the shift count is. Fixed by always emitting 32,63,(32+pos from zero extract). On that pr89369.c testcase, the patch also changes - rxsbg %r12,%r9,64,63,32 - rxsbg %r12,%r1,64,63,32 + rxsbg %r12,%r9,32,63,32 + rxsbg %r12,%r1,32,63,32 and - rxsbg %r1,%r3,64,63,32 + rxsbg %r1,%r3,32,63,32 which are all with zero_extract with len 32 and pos 0, so again, it wants to extract the low 32 bits. I3 64 larger than I4 63 is just weird. The patch also changes the instructions emitted in rXsbg_mode_sXl.c: - rosbg %r2,%r3,34,63,62 + rosbg %r2,%r3,32,63,62 and - rxsbg %r2,%r3,34,63,62 + rxsbg %r2,%r3,32,63,62 Here, it is __attribute__ ((noinline)) unsigned int rosbg_si_srl (unsigned int a, unsigned int b) { return a | (b >> 2); } __attribute__ ((noinline)) unsigned int rxsbg_si_srl (unsigned int a, unsigned int b) { return a ^ (b >> 2); } so from quick POV, one might think 34,63,62 is better, as we want to or in just the 30 bits from b. Both should actually work the same though, because (subreg/s/v:SI (reg/v:DI 64 [ b+-4 ]) 4) - the b argument is passed zero extended and so it doesn't really matter how many bits we extract, as long as it is 30 or more. If I try instead: __attribute__ ((noinline, noipa)) unsigned int rosbg_si_srl (unsigned int a, unsigned long long b) { return a | ((unsigned) b >> 2); } __attribute__ ((noinline, noipa)) unsigned int rxsbg_si_srl (unsigned int a, unsigned long long b) { return a ^ ((unsigned) b >> 2); } then both the unpatched and patched compiler emit properly rosbg %r2,%r3,34,63,62 and rxsbg %r2,%r3,34,63,62 through a different pattern, because in that case we must not rely on the upper 32 bits of b being zero.
In addition to this change, the patch adds a cleanup, there is no reason to use a static buffer in each instruction and increase global state, we can just tweak the arguments and let the caller deal with it. That is something normally done in other parts of the s390.md as well. As small CONST_INTs are hashed, it shouldn't increase compile time memory. Bootstrapped/regtested on s390x-linux, ok for trunk? 2019-02-16 Jakub Jelinek <ja...@redhat.com> PR target/89369 * config/s390/s390.md (*r<noxa>sbg_<mode>_srl_bitmask, *r<noxa>sbg_<mode>_sll, *r<noxa>sbg_<mode>_srl): Don't construct pattern in a temporary buffer. (*r<noxa>sbg_sidi_srl): Likewise. Always use 32 as I3 rather than 64-operands[2]. * gcc.c-torture/execute/pr89369.c: New test. * gcc.target/s390/md/rXsbg_mode_sXl.c (rosbg_si_srl, rxsbg_si_srl): Expect last 3 operands 32,63,62 rather than 34,63,62. --- gcc/config/s390/s390.md.jj 2019-02-05 22:59:04.883503954 +0100 +++ gcc/config/s390/s390.md 2019-02-15 18:54:35.037131906 +0100 @@ -4263,10 +4263,8 @@ (define_insn "*r<noxa>sbg_<mode>_srl_bit && s390_extzv_shift_ok (<bitsize>, 64 - INTVAL (operands[3]), INTVAL (operands[2]))" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%%<bfstart>2,%%<bfend>2,%ld", - 64 - INTVAL (operands[3])); - return buffer; + operands[3] = GEN_INT (64 - INTVAL (operands[3])); + return "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%3"; } [(set_attr "op_type" "RIE")]) @@ -4301,10 +4299,8 @@ (define_insn "*r<noxa>sbg_<mode>_sll" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,<bitoff>,%ld,%%2", - 63 - INTVAL (operands[2])); - return buffer; + operands[3] = GEN_INT (63 - INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,<bitoff>,%3,%2"; } [(set_attr "op_type" "RIE")]) @@ -4322,10 +4318,9 @@ (define_insn "*r<noxa>sbg_<mode>_srl" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%ld,63,%ld", - <bitoff_plus> INTVAL (operands[2]), 64 - INTVAL (operands[2])); - return buffer; + operands[3] = GEN_INT (64 - INTVAL (operands[2])); + operands[2] = GEN_INT (<bitoff_plus> INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,%2,63,%3"; } [(set_attr "op_type" "RIE")]) @@ -4343,10 +4338,8 @@ (define_insn "*r<noxa>sbg_sidi_srl" (clobber (reg:CC CC_REGNUM))] "TARGET_Z10" { - static char buffer[256]; - sprintf (buffer, "r<noxa>sbg\t%%0,%%1,%ld,63,%ld", - 64 - INTVAL (operands[2]), 32 + INTVAL (operands[2])); - return buffer; + operands[2] = GEN_INT (32 + INTVAL (operands[2])); + return "r<noxa>sbg\t%0,%1,32,63,%2"; } [(set_attr "op_type" "RIE")]) --- gcc/testsuite/gcc.c-torture/execute/pr89369.c.jj 2019-02-15 18:54:05.425620085 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr89369.c 2019-02-15 18:53:40.801026052 +0100 @@ -0,0 +1,69 @@ +/* PR target/89369 */ + +#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8 && __CHAR_BIT__ == 8 +struct S { unsigned int u[4]; }; + +static void +foo (struct S *out, struct S const *in, int shift) +{ + unsigned long long th, tl, oh, ol; + th = ((unsigned long long) in->u[3] << 32) | in->u[2]; + tl = ((unsigned long long) in->u[1] << 32) | in->u[0]; + oh = th >> (shift * 8); + ol = tl >> (shift * 8); + ol |= th << (64 - shift * 8); + out->u[1] = ol >> 32; + out->u[0] = ol; + out->u[3] = oh >> 32; + out->u[2] = oh; +} + +static void +bar (struct S *out, struct S const *in, int shift) +{ + unsigned long long th, tl, oh, ol; + th = ((unsigned long long) in->u[3] << 32) | in->u[2]; + tl = ((unsigned long long) in->u[1] << 32) | in->u[0]; + oh = th << (shift * 8); + ol = tl << (shift * 8); + oh |= tl >> (64 - shift * 8); + out->u[1] = ol >> 32; + out->u[0] = ol; + out->u[3] = oh >> 32; + out->u[2] = oh; +} + +__attribute__((noipa)) static void +baz (struct S *r, struct S *a, struct S *b, struct S *c, struct S *d) +{ + struct S x, y; + bar (&x, a, 1); + foo (&y, c, 1); + r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> 11) & 0xdfffffefU) ^ y.u[0] ^ (d->u[0] << 18); + r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> 11) & 0xddfecb7fU) ^ y.u[1] ^ (d->u[1] << 18); + r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> 11) & 0xbffaffffU) ^ y.u[2] ^ (d->u[2] << 18); + r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> 11) & 0xbffffff6U) ^ y.u[3] ^ (d->u[3] << 18); +} + +int +main () +{ + struct S a[] = { { 0x000004d3, 0xbc5448db, 0xf22bde9f, 0xebb44f8f }, + { 0x03a32799, 0x60be8246, 0xa2d266ed, 0x7aa18536 }, + { 0x15a38518, 0xcf655ce1, 0xf3e09994, 0x50ef69fe }, + { 0x88274b07, 0xe7c94866, 0xc0ea9f47, 0xb6a83c43 }, + { 0xcd0d0032, 0x5d47f5d7, 0x5a0afbf6, 0xaea87b24 }, + { 0, 0, 0, 0 } }; + baz (&a[5], &a[0], &a[1], &a[2], &a[3]); + if (a[4].u[0] != a[5].u[0] || a[4].u[1] != a[5].u[1] + || a[4].u[2] != a[5].u[2] || a[4].u[3] != a[5].u[3]) + __builtin_abort (); + return 0; +} +#else +int +main () +{ + return 0; +} +#endif --- gcc/testsuite/gcc.target/s390/md/rXsbg_mode_sXl.c.jj 2018-11-16 17:33:44.126195406 +0100 +++ gcc/testsuite/gcc.target/s390/md/rXsbg_mode_sXl.c 2019-02-16 19:16:54.372512707 +0100 @@ -46,7 +46,7 @@ rosbg_si_srl (unsigned int a, unsigned i { return a | (b >> 2); } -/* { dg-final { scan-assembler-times "rosbg\t%r.,%r.,34,63,62" 1 } } */ +/* { dg-final { scan-assembler-times "rosbg\t%r.,%r.,32,63,62" 1 } } */ __attribute__ ((noinline)) unsigned int rxsbg_si_sll (unsigned int a, unsigned int b) @@ -60,7 +60,7 @@ rxsbg_si_srl (unsigned int a, unsigned i { return a ^ (b >> 2); } -/* { dg-final { scan-assembler-times "rxsbg\t%r.,%r.,34,63,62" 1 } } */ +/* { dg-final { scan-assembler-times "rxsbg\t%r.,%r.,32,63,62" 1 } } */ __attribute__ ((noinline)) unsigned long long di_sll (unsigned long long x) Jakub