On Thu, Oct 12, 2017 at 9:39 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > The ia32intrin.h rotate intrinsics require the second argument to be > in between 1 and 31 (or 63), otherwise they invoke UB. But, we can do much > better while generating the same instruction when optimizing, so the > following patch uses the patterns we pattern recognize well and where > the second argument can be any value. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-10-12 Jakub Jelinek <ja...@redhat.com> > > PR target/82498 > * config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow > any values of __C while still being pattern recognizable as a simple > rotate instruction. > > * gcc.dg/ubsan/pr82498.c: New test.
LGTM. Thanks, Uros. > --- gcc/config/i386/ia32intrin.h.jj 2017-01-01 12:45:42.000000000 +0100 > +++ gcc/config/i386/ia32intrin.h 2017-10-12 09:55:24.235602737 +0200 > @@ -147,7 +147,8 @@ extern __inline unsigned int > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > __rold (unsigned int __X, int __C) > { > - return (__X << __C) | (__X >> (32 - __C)); > + __C &= 31; > + return (__X << __C) | (__X >> (-__C & 31)); > } > > /* 8bit ror */ > @@ -171,7 +172,8 @@ extern __inline unsigned int > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > __rord (unsigned int __X, int __C) > { > - return (__X >> __C) | (__X << (32 - __C)); > + __C &= 31; > + return (__X >> __C) | (__X << (-__C & 31)); > } > > /* Pause */ > @@ -239,7 +241,8 @@ extern __inline unsigned long long > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > __rolq (unsigned long long __X, int __C) > { > - return (__X << __C) | (__X >> (64 - __C)); > + __C &= 63; > + return (__X << __C) | (__X >> (-__C & 63)); > } > > /* 64bit ror */ > @@ -247,7 +250,8 @@ extern __inline unsigned long long > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > __rorq (unsigned long long __X, int __C) > { > - return (__X >> __C) | (__X << (64 - __C)); > + __C &= 63; > + return (__X >> __C) | (__X << (-__C & 63)); > } > > /* Read flags register */ > --- gcc/testsuite/gcc.dg/ubsan/pr82498.c.jj 2017-10-12 09:40:36.025438511 > +0200 > +++ gcc/testsuite/gcc.dg/ubsan/pr82498.c 2017-10-12 10:06:06.636790077 > +0200 > @@ -0,0 +1,159 @@ > +/* PR target/82498 */ > +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ > +/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */ > + > +#include <x86intrin.h> > + > +volatile unsigned int a; > +volatile unsigned long long b; > +volatile int c; > + > +int > +main () > +{ > + a = 0x12345678U; > + a = __rold (a, 0); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rold (a, 32); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rold (a, -32); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rold (a, 37); > + if (a != 0x468acf02U) > + __builtin_abort (); > + a = __rold (a, -5); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rord (a, 0); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rord (a, 32); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rord (a, -32); > + if (a != 0x12345678U) > + __builtin_abort (); > + a = __rord (a, -37); > + if (a != 0x468acf02U) > + __builtin_abort (); > + a = __rord (a, 5); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = 0; > + a = __rold (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = 32; > + a = __rold (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = -32; > + a = __rold (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = 37; > + a = __rold (a, c); > + if (a != 0x468acf02U) > + __builtin_abort (); > + c = -5; > + a = __rold (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = 0; > + a = __rord (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = 32; > + a = __rord (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = -32; > + a = __rord (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > + c = -37; > + a = __rord (a, c); > + if (a != 0x468acf02U) > + __builtin_abort (); > + c = 5; > + a = __rord (a, c); > + if (a != 0x12345678U) > + __builtin_abort (); > +#ifdef __x86_64__ > + b = 0x123456789abcdef1ULL; > + b = __rolq (b, 0); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rolq (b, 64); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rolq (b, -64); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rolq (b, 69); > + if (b != 0x468acf13579bde22ULL) > + __builtin_abort (); > + b = __rolq (b, -5); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rorq (b, 0); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rorq (b, 64); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rorq (b, -64); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + b = __rorq (b, -69); > + if (b != 0x468acf13579bde22ULL) > + __builtin_abort (); > + b = __rorq (b, 5); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = 0; > + b = __rolq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = 64; > + b = __rolq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = -64; > + b = __rolq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = 69; > + b = __rolq (b, c); > + if (b != 0x468acf13579bde22ULL) > + __builtin_abort (); > + c = -5; > + b = __rolq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = 0; > + b = __rorq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = 64; > + b = __rorq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = -64; > + b = __rorq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > + c = -69; > + b = __rorq (b, c); > + if (b != 0x468acf13579bde22ULL) > + __builtin_abort (); > + c = 5; > + b = __rorq (b, c); > + if (b != 0x123456789abcdef1ULL) > + __builtin_abort (); > +#endif > + return 0; > +} > > Jakub