On Tue, Jan 05, 2021 at 01:55:21PM +0100, Richard Biener wrote:
> > Note, I have no idea why the bswap code needs TODO_update_ssa if it changed
> > things, for the vuses it copies them from the surrounding vuses, which looks
> > correct to me.  Perhaps because it uses force_gimple_operand_gsi* in a few
> > spots in bswap_replace?  Confused...
> 
> .. that shouldn't cause updating SSA to be necessary.  Maybe it at some
> point did not update virtual operands appropriately.

Ok, I've committed the following version without the TODO_update_ssa, which
passed another bootstrap/regtest on x86_64-linux and i686-linux.

2021-01-05  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/96239
        * gimple-ssa-store-merging.c (maybe_optimize_vector_constructor): New
        function.
        (get_status_for_store_merging): Don't return BB_INVALID for blocks
        with potential bswap optimizable CONSTRUCTORs.
        (pass_store_merging::execute): Optimize vector CONSTRUCTORs with bswap
        if possible.

        * gcc.dg/tree-ssa/pr96239.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2020-12-16 13:07:51.729733816 +0100
+++ gcc/gimple-ssa-store-merging.c      2020-12-16 16:02:06.238868137 +0100
@@ -1255,6 +1255,75 @@ bswap_replace (gimple_stmt_iterator gsi,
   return tgt;
 }
 
+/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
+   using bswap optimizations.  CDI_DOMINATORS need to be
+   computed on entry.  Return true if it has been optimized and
+   TODO_update_ssa is needed.  */
+
+static bool
+maybe_optimize_vector_constructor (gimple *cur_stmt)
+{
+  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+  struct symbolic_number n;
+  bool bswap;
+
+  gcc_assert (is_gimple_assign (cur_stmt)
+             && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
+
+  tree rhs = gimple_assign_rhs1 (cur_stmt);
+  if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+      || gimple_assign_lhs (cur_stmt) == NULL_TREE)
+    return false;
+
+  HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+  switch (sz)
+    {
+    case 16:
+      load_type = bswap_type = uint16_type_node;
+      break;
+    case 32:
+      if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+         && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+       {
+         load_type = uint32_type_node;
+         fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+         bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+       }
+      else
+       return false;
+      break;
+    case 64:
+      if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+         && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+             || (word_mode == SImode
+                 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+                 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
+       {
+         load_type = uint64_type_node;
+         fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+         bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+       }
+      else
+       return false;
+      break;
+    default:
+      return false;
+    }
+
+  gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+  if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz)
+    return false;
+
+  if (bswap && !fndecl && n.range != 16)
+    return false;
+
+  memset (&nop_stats, 0, sizeof (nop_stats));
+  memset (&bswap_stats, 0, sizeof (bswap_stats));
+  return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
+                       bswap_type, load_type, &n, bswap) != NULL_TREE;
+}
+
 /* Find manual byte swap implementations as well as load in a given
    endianness. Byte swaps are turned into a bswap builtin invokation
    while endian loads are converted to bswap builtin invokation or
@@ -5126,6 +5195,7 @@ static enum basic_block_status
 get_status_for_store_merging (basic_block bb)
 {
   unsigned int num_statements = 0;
+  unsigned int num_constructors = 0;
   gimple_stmt_iterator gsi;
   edge e;
 
@@ -5138,9 +5208,27 @@ get_status_for_store_merging (basic_bloc
 
       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
        break;
+
+      if (is_gimple_assign (stmt)
+         && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
+       {
+         tree rhs = gimple_assign_rhs1 (stmt);
+         if (VECTOR_TYPE_P (TREE_TYPE (rhs))
+             && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+             && gimple_assign_lhs (stmt) != NULL_TREE)
+           {
+             HOST_WIDE_INT sz
+               = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+             if (sz == 16 || sz == 32 || sz == 64)
+               {
+                 num_constructors = 1;
+                 break;
+               }
+           }
+       }
     }
 
-  if (num_statements == 0)
+  if (num_statements == 0 && num_constructors == 0)
     return BB_INVALID;
 
   if (cfun->can_throw_non_call_exceptions && cfun->eh
@@ -5149,7 +5237,7 @@ get_status_for_store_merging (basic_bloc
       && e->dest == bb->next_bb)
     return BB_EXTENDED_VALID;
 
-  return num_statements >= 2 ? BB_VALID : BB_INVALID;
+  return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
 }
 
 /* Entry point for the pass.  Go over each basic block recording chains of
@@ -5189,9 +5278,10 @@ pass_store_merging::execute (function *f
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
 
-      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
        {
          gimple *stmt = gsi_stmt (gsi);
+         gsi_next (&gsi);
 
          if (is_gimple_debug (stmt))
            continue;
@@ -5207,6 +5297,11 @@ pass_store_merging::execute (function *f
              continue;
            }
 
+         if (is_gimple_assign (stmt)
+             && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
+             && maybe_optimize_vector_constructor (stmt))
+           continue;
+
          if (store_valid_for_store_merging_p (stmt))
            changed |= process_store (stmt);
          else
--- gcc/testsuite/gcc.dg/tree-ssa/pr96239.c.jj  2020-12-16 16:10:30.013256862 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr96239.c     2020-12-16 16:11:08.802822537 
+0100
@@ -0,0 +1,18 @@
+/* PR tree-optimization/96239 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " r>> 8;" "optimized" { target bswap } } } */
+
+union U { unsigned char c[2]; unsigned short s; };
+
+unsigned short
+foo (unsigned short x)
+{
+  union U u;
+  u.s = x;
+  unsigned char v = u.c[0];
+  unsigned char w = u.c[1];
+  u.c[0] = w;
+  u.c[1] = v;
+  return u.s;
+}


        Jakub

Reply via email to