> From: Richard Biener [mailto:richard.guent...@gmail.com]
> Sent: Wednesday, October 08, 2014 8:27 AM
> 
> I wouldn't worry about that too much.  Indeed the question would be
> what
> should be canonical on GIMPLE (expanders should choose the optimal
> vairant from both).  I think a tree code should be always prefered to a
> builtin function call - which means a rotate is more canonical than a
> bswap16 call.

Below is the updated patch. ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2014-10-29  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/63259
        * tree-ssa-math-opts.c (bswap_replace): Replace expression by a
        rotation left if it is a 16 bit byte swap.
        (pass_optimize_bswap::execute): Also consider bswap in LROTATE_EXPR and
        RROTATE_EXPR statements if it is a byte rotation.


*** gcc/testsuite/ChangeLog ***

2014-10-29  Thomas Preud'homme  <thomas.preudho...@arm.com>

        PR tree-optimization/63259
        * optimize-bswapsi-1.c (swap32_f): New bswap pass test.
        * optimize-bswaphi-1.c: Drop useless SIType definition and fix typo in
        following comment.


diff --git a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c 
b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
index 18aba28..692fceb 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
@@ -42,11 +42,10 @@ uint32_t read_be16_3 (unsigned char *data)
   return *(data + 1) | (*data << 8);
 }
 
-typedef int SItype __attribute__ ((mode (SI)));
 typedef int HItype __attribute__ ((mode (HI)));
 
 /* Test that detection of significant sign extension works correctly. This
-   checks that unknown byte marker are set correctly in cast of cast.  */
+   checks that unknown byte markers are set correctly in cast of cast.  */
 
 HItype
 swap16 (HItype in)
diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c 
b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
index cfde218..ad3ede4 100644
--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
@@ -78,5 +78,16 @@ swap32_e (SItype in)
         | (((in >> 24) & 0xFF) << 0);
 }
 
-/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 5 
"bswap" } } */
+/* This variant comes from PR63259.  It compiles to a gimple sequence that ends
+   with a rotation instead of a bitwise OR.  */
+
+unsigned
+swap32_f (unsigned in)
+{
+  in = ((in & 0xff00ff00) >>  8) | ((in & 0x00ff00ff) <<  8);
+  in = ((in & 0xffff0000) >> 16) | ((in & 0x0000ffff) << 16);
+  return in;
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 6 
"bswap" } } */
 /* { dg-final { cleanup-tree-dump "bswap" } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index e0f2924..5b656e0 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2187,7 +2187,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, 
gimple src_stmt,
               struct symbolic_number *n, bool bswap)
 {
   tree src, tmp, tgt;
-  gimple call;
+  gimple bswap_stmt;
 
   src = gimple_assign_rhs1 (src_stmt);
   tgt = gimple_assign_lhs (cur_stmt);
@@ -2293,16 +2293,28 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator 
gsi, gimple src_stmt,
 
   tmp = src;
 
-  /* Convert the src expression if necessary.  */
-  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+  /* Canonical form for 16 bit bswap is a rotate expression.  */
+  if (bswap && n->range == 16)
     {
-      gimple convert_stmt;
-      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
-      convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, NULL);
-      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+      tree count = build_int_cst (NULL, BITS_PER_UNIT);
+      bswap_type = TREE_TYPE (src);
+      src = fold_build2 (LROTATE_EXPR, bswap_type, src, count);
+      bswap_stmt = gimple_build_assign (NULL, src);
     }
+  else
+    {
+      /* Convert the src expression if necessary.  */
+      if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+       {
+         gimple convert_stmt;
+         tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+         convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src,
+                                                      NULL);
+         gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+       }
 
-  call = gimple_build_call (fndecl, 1, tmp);
+      bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+    }
 
   tmp = tgt;
 
@@ -2315,7 +2327,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, 
gimple src_stmt,
       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
     }
 
-  gimple_call_set_lhs (call, tmp);
+  gimple_set_lhs (bswap_stmt, tmp);
 
   if (dump_file)
     {
@@ -2324,7 +2336,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, 
gimple src_stmt,
       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
     }
 
-  gsi_insert_after (&gsi, call, GSI_SAME_STMT);
+  gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
   gsi_remove (&gsi, true);
   return true;
 }
@@ -2388,13 +2400,29 @@ pass_optimize_bswap::execute (function *fun)
         {
          gimple src_stmt, cur_stmt = gsi_stmt (gsi);
          tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+         enum tree_code code;
          struct symbolic_number n;
          bool bswap;
 
-         if (!is_gimple_assign (cur_stmt)
-             || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR)
+         if (!is_gimple_assign (cur_stmt))
            continue;
 
+         code = gimple_assign_rhs_code (cur_stmt);
+         switch (code)
+           {
+           case LROTATE_EXPR:
+           case RROTATE_EXPR:
+             if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
+                 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
+                    % BITS_PER_UNIT)
+               continue;
+             /* Fall through.  */
+           case BIT_IOR_EXPR:
+             break;
+           default:
+             continue;
+           }
+
          src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
 
          if (!src_stmt)

Ok for trunk?

> 
> From the performance side the pass could be re-structured to populate
> a lattice, thus work from def to use instead of the other way around.
> Which
> means we visit each stmt exactly once, compute its value symbolically
> and check it against a rotate.

That sounds nice but is an heavy change. IMHO this should be done only if it
can be shown that bswap can have significan impact on compilation time. In
any case, with the approaching end of stage1 this would be material for next
stage1.

Best regards,

Thomas



Reply via email to