Hi!

As can be seen on the testcase below, the *<rotate_insn><mode>3_mask
insn/splitter is able to optimize only the case when the and is
performed in SImode and then the result subreged into QImode,
while if the computation is already in QImode, we don't handle it.

Fixed by adding another pattern, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?

2017-10-11  Jakub Jelinek  <ja...@redhat.com>

        PR target/82498
        * config/i386/i386.md (*<rotate_insn><mode>3_mask_1): New
        define_insn_and_split.

        * gcc.target/i386/pr82498.c: New test.

--- gcc/config/i386/i386.md.jj  2017-10-10 11:54:11.000000000 +0200
+++ gcc/config/i386/i386.md     2017-10-11 19:24:27.673606778 +0200
@@ -11187,6 +11187,26 @@ (define_insn_and_split "*<rotate_insn><m
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+       (any_rotate:SWI48
+         (match_operand:SWI48 1 "nonimmediate_operand")
+         (and:QI
+           (match_operand:QI 2 "register_operand")
+           (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+          (any_rotate:SWI48 (match_dup 1)
+                            (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
--- gcc/testsuite/gcc.target/i386/pr82498.c.jj  2017-10-11 20:21:10.677088306 
+0200
+++ gcc/testsuite/gcc.target/i386/pr82498.c     2017-10-11 20:22:31.569101564 
+0200
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}

        Jakub

Reply via email to