Hi,

The attached patch catches C constructs:
(A << 8) | (A >> 8)
where A is unsigned 16 bits
and maps them to builtin_bswap16(A) which can provide more efficient
implementations on some targets.

The construct above is equivalent to the default bswap16 implementation.

I have added a testcase for ARM, and have found no regression with
qemu-arm on arm-none-linux-gnueabi.

OK?

Christophe
2012-09-19  Christophe Lyon <christophe.l...@linaro.org>

        gcc/
        * fold-const.c (fold_binary_loc): call builtin_bswap16 when the
        equivalent construct is detected.

        gcc/testsuite/
        * gcc.target/arm/builtin-bswap-2.c: New testcase.

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 2bf5179..0ff7e8b 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -10326,6 +10326,99 @@ fold_binary_loc (location_t loc,
          }
       }
 
+      /* Catch bswap16 construct: (A << 8) | (A >> 8) where A is
+         unsigned 16 bits.
+         This has been expanded into:
+        (ior:SI (lshift:SI (nop:SI A:HI) 8)
+                (nop:SI (rshift:HI A:HI 8)))
+      */
+      {
+       enum tree_code code0, code1;
+       tree my_arg0 = arg0;
+       tree my_arg1= arg1;
+       tree rtype;
+
+       code0 = TREE_CODE (arg0);
+       code1 = TREE_CODE (arg1);
+       if (code1 == NOP_EXPR)
+         {
+           my_arg1 = TREE_OPERAND (arg1, 0);
+           code1 = TREE_CODE (my_arg1);
+         }
+       else if (code0 == NOP_EXPR)
+         {
+           my_arg0 = TREE_OPERAND (arg0, 0);
+           code0 = TREE_CODE (my_arg0);
+         }
+
+       /* Handle (A << C1) + (A >> C1).  */
+       if ((code1 == RSHIFT_EXPR && code0 == LSHIFT_EXPR)
+            && (TREE_CODE (TREE_OPERAND (my_arg0, 0)) == NOP_EXPR)
+            && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg0, 0), 0),
+                                TREE_OPERAND (my_arg1, 0), 0)
+            && (rtype = TREE_TYPE (TREE_OPERAND (my_arg1, 0)),
+                TYPE_UNSIGNED (rtype)))
+         {
+           tree tree01, tree11;
+           enum tree_code code01, code11;
+
+           tree01 = TREE_OPERAND (my_arg0, 1);
+           tree11 = TREE_OPERAND (my_arg1, 1);
+           STRIP_NOPS (tree01);
+           STRIP_NOPS (tree11);
+           code01 = TREE_CODE (tree01);
+           code11 = TREE_CODE (tree11);
+
+           /* Check that shift amount is 8, and input 16 bits wide.  */
+           if (code01 == INTEGER_CST
+               && code11 == INTEGER_CST
+               && TREE_INT_CST_HIGH (tree01) == 0
+               && TREE_INT_CST_HIGH (tree11) == 0
+               && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11)
+               && TREE_INT_CST_LOW (tree01) == 8
+               && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg1, 0))) == 16)
+             {
+               tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16);
+               return build_call_expr_loc (loc, bswapfn, 1,
+                                           TREE_OPERAND (my_arg1, 0));
+             }
+         }
+
+       /* Handle (A >> C1) + (A << C1).  */
+       else if ((code0 == RSHIFT_EXPR && code1 == LSHIFT_EXPR)
+                && (TREE_CODE (TREE_OPERAND (my_arg1, 0)) == NOP_EXPR)
+                && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg1, 0),
+                                                  0),
+                                    TREE_OPERAND (my_arg0, 0), 0)
+                && (rtype = TREE_TYPE (TREE_OPERAND (my_arg0, 0)),
+                    TYPE_UNSIGNED (rtype)))
+         {
+           tree tree01, tree11;
+           enum tree_code code01, code11;
+
+           tree01 = TREE_OPERAND (my_arg0, 1);
+           tree11 = TREE_OPERAND (my_arg1, 1);
+           STRIP_NOPS (tree01);
+           STRIP_NOPS (tree11);
+           code01 = TREE_CODE (tree01);
+           code11 = TREE_CODE (tree11);
+
+           /* Check that shift amount is 8, and input 16 bits wide.  */
+           if (code01 == INTEGER_CST
+               && code11 == INTEGER_CST
+               && TREE_INT_CST_HIGH (tree01) == 0
+               && TREE_INT_CST_HIGH (tree11) == 0
+               && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11)
+               && TREE_INT_CST_LOW (tree01) == 8
+               && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg0, 0))) == 16)
+             {
+               tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16);
+               return build_call_expr_loc (loc, bswapfn, 1,
+                                           TREE_OPERAND (my_arg0, 0));
+             }
+         }
+      }
+
     associate:
       /* In most languages, can't associate operations on floats through
         parentheses.  Rather than remember where the parentheses were, we
diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c 
b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c
new file mode 100644
index 0000000..93dbb35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
+
+unsigned short swapu16_1 (unsigned short x)
+{
+  return (x << 8) | (x >> 8);
+}
+
+unsigned short swapu16_2 (unsigned short x)
+{
+  return (x >> 8) | (x << 8);
+}
+
+unsigned int swapu32_1 (unsigned int x)
+{
+  return (x << 16) | (x >> 16);
+}
+
+unsigned int swapu32_2 (unsigned int x)
+{
+  return (x >> 16) | (x << 16);
+}

Reply via email to