On Fri, Mar 5, 2021 at 9:40 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The following testcase shows that while we nicely optimize away the
> useless and? of shift count before rotation for [SD]Imode rotates,
> we don't do that for [QH]Imode.
>
> The following patch optimizes that by using the right iterator on those
> 4 patterns.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?
> Or just GCC12?
>
> 2021-03-05  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/99405
>         * config/i386/i386.md (*<insn><mode>3_mask, *<insn><mode>3_mask_1):
>         For any_rotate define_insn_split and following splitters, use
>         SWI iterator instead of SWI48.
>
>         * gcc.target/i386/pr99405.c: New test.

I'm not sure I remember why I left out QI and HImode from rotates (and
shifts) when these patterns were introduced, but the testcase shows
that they are effective. Since this is not a regression, OK for gcc-12

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2021-03-03 10:02:27.871589603 +0100
> +++ gcc/config/i386/i386.md     2021-03-05 14:00:35.768378973 +0100
> @@ -11951,9 +11951,9 @@ (define_expand "<insn><mode>3"
>
>  ;; Avoid useless masking of count operand.
>  (define_insn_and_split "*<insn><mode>3_mask"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand")
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
>           (subreg:QI
>             (and:SI
>               (match_operand:SI 2 "register_operand" "c")
> @@ -11967,15 +11967,15 @@ (define_insn_and_split "*<insn><mode>3_m
>    "&& 1"
>    [(parallel
>       [(set (match_dup 0)
> -          (any_rotate:SWI48 (match_dup 1)
> -                            (match_dup 2)))
> +          (any_rotate:SWI (match_dup 1)
> +                          (match_dup 2)))
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = gen_lowpart (QImode, operands[2]);")
>
>  (define_split
> -  [(set (match_operand:SWI48 0 "register_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "const_int_operand")
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
>           (subreg:QI
>             (and:SI
>               (match_operand:SI 2 "register_operand")
> @@ -11984,14 +11984,14 @@ (define_split
>     == GET_MODE_BITSIZE (<MODE>mode) - 1"
>   [(set (match_dup 4) (match_dup 1))
>    (set (match_dup 0)
> -       (any_rotate:SWI48 (match_dup 4)
> -                        (subreg:QI (match_dup 2) 0)))]
> +       (any_rotate:SWI (match_dup 4)
> +                      (subreg:QI (match_dup 2) 0)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
>  (define_insn_and_split "*<insn><mode>3_mask_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand")
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
>           (and:QI
>             (match_operand:QI 2 "register_operand" "c")
>             (match_operand:QI 3 "const_int_operand"))))
> @@ -12004,14 +12004,14 @@ (define_insn_and_split "*<insn><mode>3_m
>    "&& 1"
>    [(parallel
>       [(set (match_dup 0)
> -          (any_rotate:SWI48 (match_dup 1)
> -                            (match_dup 2)))
> +          (any_rotate:SWI (match_dup 1)
> +                          (match_dup 2)))
>        (clobber (reg:CC FLAGS_REG))])])
>
>  (define_split
> -  [(set (match_operand:SWI48 0 "register_operand")
> -       (any_rotate:SWI48
> -         (match_operand:SWI48 1 "const_int_operand")
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "const_int_operand")
>           (and:QI
>             (match_operand:QI 2 "register_operand")
>             (match_operand:QI 3 "const_int_operand"))))]
> @@ -12019,7 +12019,7 @@ (define_split
>    == GET_MODE_BITSIZE (<MODE>mode) - 1"
>   [(set (match_dup 4) (match_dup 1))
>    (set (match_dup 0)
> -       (any_rotate:SWI48 (match_dup 4) (match_dup 2)))]
> +       (any_rotate:SWI (match_dup 4) (match_dup 2)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
>  ;; Implement rotation using two double-precision
> --- gcc/testsuite/gcc.target/i386/pr99405.c.jj  2021-03-05 13:40:20.334860937 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr99405.c     2021-03-05 13:39:51.131185009 
> +0100
> @@ -0,0 +1,23 @@
> +/* PR target/99405 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=generic -fomit-frame-pointer" } */
> +/* { dg-final { scan-assembler-not "\tand\[bl]\t\\\$" } } */
> +
> +unsigned char f1 (unsigned char x, unsigned y) { return (x << (y & 7)) | (x 
> >> (-y & 7)); }
> +unsigned short f2 (unsigned short x, unsigned y) { return (x << (y & 15)) | 
> (x >> (-y & 15)); }
> +unsigned int f3 (unsigned int x, unsigned y) { return (x << (y & 31)) | (x 
> >> (-y & 31)); }
> +unsigned char f4 (unsigned char x, unsigned y) { return (x >> (y & 7)) | (x 
> << (-y & 7)); }
> +unsigned short f5 (unsigned short x, unsigned y) { return (x >> (y & 15)) | 
> (x << (-y & 15)); }
> +unsigned int f6 (unsigned int x, unsigned y) { return (x >> (y & 31)) | (x 
> << (-y & 31)); }
> +unsigned char f7 (unsigned char x, unsigned char y) { unsigned char v = y & 
> 7; unsigned char w = -y & 7; return (x << v) | (x >> w); }
> +unsigned short f8 (unsigned short x, unsigned char y) { unsigned char v = y 
> & 15; unsigned char w = -y & 15; return (x << v) | (x >> w); }
> +unsigned int f9 (unsigned int x, unsigned char y) { unsigned char v = y & 
> 31; unsigned char w = -y & 31; return (x << v) | (x >> w); }
> +unsigned char f10 (unsigned char x, unsigned char y) { unsigned char v = y & 
> 7; unsigned char w = -y & 7; return (x >> v) | (x << w); }
> +unsigned short f11 (unsigned short x, unsigned char y) { unsigned char v = y 
> & 15; unsigned char w = -y & 15; return (x >> v) | (x << w); }
> +unsigned int f12 (unsigned int x, unsigned char y) { unsigned char v = y & 
> 31; unsigned char w = -y & 31; return (x >> v) | (x << w); }
> +#ifdef __x86_64__
> +unsigned long long f13 (unsigned long long x, unsigned y) { return (x << (y 
> & 63)) | (x >> (-y & 63)); }
> +unsigned long long f14 (unsigned long long x, unsigned y) { return (x >> (y 
> & 63)) | (x << (-y & 63)); }
> +unsigned long long f15 (unsigned long long x, unsigned char y) { unsigned 
> char v = y & 63; unsigned char w = -y & 63; return (x << v) | (x >> w); }
> +unsigned long long f16 (unsigned long long x, unsigned char y) { unsigned 
> char v = y & 63; unsigned char w = -y & 63; return (x >> v) | (x << w); }
> +#endif
>
>         Jakub
>

Reply via email to