Re: [PATCH] Cancel bswap opt when intermediate stmts are reused

Richard Biener Mon, 17 Nov 2014 04:47:35 -0800

On Thu, Aug 7, 2014 at 12:55 PM, Thomas Preud'homme
<thomas.preudho...@arm.com> wrote:
> Hi all,
>
> Currently, when an expression doing manual load or bswap is detected, the
> bswap optimization replace it by an actual load and/or bswap instruction
> without considering whether the intermediate values computed in the
> expressions are reused later. If that is the case, the construction of these
> values has to be retained and the sum of the load and/or bswap instruction
> and the instruction to contruct those values might be more expensive than
> the initial fully manual expression. This patch aims at detecting such a case
> and cancel the bswap optimization. In addition, it takes advantage of the
> infrastructure necessary for the detection to also cleanup the stmts that
> become useless when the bswap optimization is performed.


Comments below

> *** gcc/ChangeLog ***
>
> 2014-08-01  Thomas Preud'homme  <thomas.preudho...@arm.com>
>
>         * tree-ssa-math-opts.c (struct usedtree): New.
>         (find_bswap_or_nop_1): Change prototype to take a hashtable and a list
>         of struct usedtree. Fill respectively these with visited stmts and
>         trees (along with the stmts where they are defined) that would not be
>         defined if bswap optimization is applied. Adapt recursion call to
>         prototype change.
>         (find_bswap_or_nop): Adapt to find_bswap_or_nop_1 prototype change.
>         Pass the hashtable and the list of struct usedtree from
>         pass_optimize_bswap::execute ().
>         (do_bswap_p): New.
>         (bswap_replace): Fix typo in heading comment. Stop returning whether
>         the bswap optimization was performed since this is now handled by
>         do_bswap_p (). Move alignment check to do_bswap_p ().
>         (free_usedtrees_list): New.
>         (pass_optimize_bswap::execute): Add allocation and deallocation
>         handling of the hashtable of browsed stmts. Free the list of struct
>         usedtree at the end of each iteration using free_usedtrees_list () and
>         the new bswap_check_end_iter label. Move some of the logic to perform
>         the bswap optimization to do_bswap_p (). Set gsi after performing the
>         bswap optimization and loop manually via the new
>         bswap_check_start_iter label so that all stmts are checked for
>         load/bswap even when cur_stmt is moved around by bswap_replace ().
>
> *** gcc/testsuite/ChangeLog ***
>
> 2014-08-01  Thomas Preud'homme  <thomas.preudho...@arm.com>
>
>         * gcc.dg/optimize-bswapsi-2.c (read_le32_1): Add an intermediate
>         variable in the mix to check that it is optimized when there is no
>         use outside the expression computing a load/bswap.
>         (read_be32_1): Likewise.
>         * gcc.dg/optimize-bswapsi-3.c: New. Create read_le32_1 () and
>         read_be32_1 () based on identically named function in
>         gcc.dg/optimize-bswapsi-2.c but reusing the intermediate variable so
>         as to check that bswap optimization is not performed in these cases.
>
> diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c 
> b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
> index de6e697..df7856b 100644
> --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
> +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
> @@ -14,7 +14,9 @@ struct uint32_st {
>
>  uint32_t read_le32_1 (void)
>  {
> -  return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
> +  uint32_t low = data[0] | (data[1] << 8);
> +  uint32_t ret = low | (data[2] << 16) | (data[3] << 24);
> +  return ret;
>  }
>
>  uint32_t read_le32_2 (struct uint32_st data)
> @@ -30,7 +32,9 @@ uint32_t read_le32_3 (unsigned char *data)
>
>  uint32_t read_be32_1 (void)
>  {
> -  return data[3] | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
> +  uint32_t low = data[3] | (data[2] << 8);
> +  uint32_t ret = low | (data[1] << 16) | (data[0] << 24);
> +  return ret;
>  }
>
>  uint32_t read_be32_2 (struct uint32_st data)
> diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c 
> b/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c
> new file mode 100644
> index 0000000..dc48d9d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target bswap32 } */
> +/* { dg-require-effective-target stdint_types } */
> +/* { dg-options "-O2 -fdump-tree-bswap" } */
> +/* { dg-additional-options "-march=z900" { target s390-*-* } } */
> +
> +#include <stdint.h>
> +
> +extern unsigned char data[4];
> +
> +/* No "bswap" optimization as low is reused */
> +uint32_t read_le32_1 (unsigned char *data, uint32_t *low_neg)
> +{
> +  uint32_t low = data[0] | (data[1] << 8);
> +  uint32_t ret = low | (data[2] << 16) | (data[3] << 24);
> +  *low_neg = low;
> +  return ret;
> +}
> +
> +/* No "bswap" optimization as low is reused */
> +uint32_t read_be32_1 (unsigned char *data, uint32_t *low_neg)
> +{
> +  uint32_t low = data[3] | (data[2] << 8);
> +  uint32_t ret = low | (data[1] << 16) | (data[0] << 24);
> +  *low_neg = low;
> +  return ret;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "32 bit load in target endianness found 
> at" "bswap" } } */
> +/* { dg-final { scan-tree-dump-not "32 bit bswap implementation found at" 
> "bswap" } } */
> +/* { dg-final { cleanup-tree-dump "bswap" } } */
> diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
> index 705793b..9f71145 100644
> --- a/gcc/tree-ssa-math-opts.c
> +++ b/gcc/tree-ssa-math-opts.c
> @@ -1641,6 +1641,18 @@ struct symbolic_number {
>  #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
>    (uint64_t)0x01020304 << 32 | 0x05060708)
>
> +/* Structure to memorize all the intermediate trees created to compute a 
> bswap
> +   or a load along with the stmt where they are used.  This allows to check
> +   whether some trees are reused outside the bswap or load performed and 
> avoid
> +   doing the optimization in that case since it might lead to more 
> computation.
> +   It also allows to remove all the stmts that create these trees in case the
> +   optimization is performed.  */
> +struct usedtree {
> +  tree t;
> +  gimple stmt;
> +  struct usedtree *next;
> +};
> +
>  /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
>     number N.  Return false if the requested operation is not permitted
>     on a symbolic number.  */
> @@ -1803,19 +1815,26 @@ find_bswap_or_nop_load (gimple stmt, tree ref, struct 
> symbolic_number *n)
>    return true;
>  }
>
> -/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
> -   the operation given by the rhs of STMT on the result.  If the operation
> -   could successfully be executed the function returns a gimple stmt whose
> -   rhs's first tree is the expression of the source operand and NULL
> -   otherwise.  */
> +/* find_bswap_or_nop_1 invokes itself recursively trying to perform the
> +   operation given by the rhs of STMT on the symbolic number represented by N
> +   until LIMIT recursions have been done or the variable that is the source 
> of
> +   the successive expressions has been found.  The function also keeps track 
> of
> +   the intermediate trees defined in the expression with the stmt where they
> +   are defined (in USED_TREES) and records the stmt that were visited (in
> +   BROWSED_STMTS_HTAB).  If the operation could successfully be executed the
> +   function returns a gimple stmt whose rhs's first tree is the expression of
> +   the source operand and NULL otherwise.  */
>
>  static gimple
> -find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
> +find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit,
> +                    htab_t browsed_stmts_htab, struct usedtree **used_trees)
>  {
> +  void **slot;
>    enum tree_code code;
>    tree rhs1, rhs2 = NULL;
>    gimple rhs1_stmt, rhs2_stmt, source_stmt1;
>    enum gimple_rhs_class rhs_class;
> +  struct usedtree *new_used;
>
>    if (!limit || !is_gimple_assign (stmt))
>      return NULL;
> @@ -1823,7 +1842,20 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>    rhs1 = gimple_assign_rhs1 (stmt);
>
>    if (find_bswap_or_nop_load (stmt, rhs1, n))
> -    return stmt;
> +    {
> +      slot = htab_find_slot (browsed_stmts_htab, stmt, INSERT);
> +      gcc_assert (slot);
> +      if (!*slot)
> +       {
> +         *slot = stmt;
> +         new_used = (struct usedtree *) xmalloc (sizeof (*used_trees));
> +         new_used->t = gimple_assign_lhs (stmt);
> +         new_used->stmt = stmt;
> +         new_used->next = *used_trees;
> +         *used_trees = new_used;

Hmm.  I am a little bit concerned about the malloc traffic generated here.
So why not use a vec<usedtree>, get rid of the ->next pointer and
use a hash_map <gimple, unsigned> to associate the stmt with
an index into the vector?

The whole scheme wouldn't exactly fly with the idea of eventually
using a lattice to propagate the symbolic numbers, but well...

> +       }
> +      return stmt;
> +    }
>
>    if (TREE_CODE (rhs1) != SSA_NAME)
>      return NULL;
> @@ -1835,6 +1867,18 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>    if (rhs_class == GIMPLE_BINARY_RHS)
>      rhs2 = gimple_assign_rhs2 (stmt);
>
> +  slot = htab_find_slot (browsed_stmts_htab, stmt, INSERT);
> +  gcc_assert (slot);
> +  if (!*slot)
> +    {
> +      *slot = stmt;
> +      new_used = (struct usedtree *) xmalloc (sizeof (*used_trees));
> +      new_used->t = gimple_assign_lhs (stmt);
> +      new_used->stmt = stmt;
> +      new_used->next = *used_trees;
> +      *used_trees = new_used;
> +    }
> +
>    /* Handle unary rhs and binary rhs with integer constants as second
>       operand.  */
>
> @@ -1851,12 +1895,14 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>           && code != CONVERT_EXPR)
>         return NULL;
>
> -      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
> +      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1,
> +                                         browsed_stmts_htab, used_trees);
>
>        /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
>          we have to initialize the symbolic number.  */
>        if (!source_stmt1)
>         {
> +
>           if (gimple_assign_load_p (stmt)
>               || !init_symbolic_number (n, rhs1))
>             return NULL;
> @@ -1942,12 +1988,14 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>        switch (code)
>         {
>         case BIT_IOR_EXPR:
> -         source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
> +         source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1,
> +                                             browsed_stmts_htab, used_trees);
>
>           if (!source_stmt1)
>             return NULL;
>
> -         source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
> +         source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1,
> +                                             browsed_stmts_htab, used_trees);
>
>           if (!source_stmt2)
>             return NULL;
> @@ -2044,16 +2092,19 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>    return NULL;
>  }
>
> -/* Check if STMT completes a bswap implementation or a read in a given
> -   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
> -   accordingly.  It also sets N to represent the kind of operations
> -   performed: size of the resulting expression and whether it works on
> -   a memory source, and if so alias-set and vuse.  At last, the
> -   function returns a stmt whose rhs's first tree is the source
> -   expression.  */
> +/* Check if STMT completes a bswap implementation or a load in a given
> +   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP accordingly.
> +   It also sets N to represent the kind of operations performed: size of the
> +   resulting expression and whether it works on a memory source, and if so
> +   alias-set and vuse.  All the intermediate trees defined in such an
> +   implementation are recorded along the stmt where they are defined (in
> +   USED_TREES) as well as the stmts that are part of that implementation (in
> +   BROWSED_STMTS_HTAB).  At last, the function returns a stmt whose rhs's 
> first
> +   tree is the source expression.  */
>
>  static gimple
> -find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
> +find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap,
> +                  htab_t browsed_stmts_htab, struct usedtree **used_trees)
>  {
>  /* The number which the find_bswap_or_nop_1 result should match in order
>     to have a full byte swap.  The number is shifted to the right
> @@ -2071,7 +2122,8 @@ find_bswap_or_nop (gimple stmt, struct symbolic_number 
> *n, bool *bswap)
>       in libgcc, and for initial shift/and operation of the src operand.  */
>    limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
>    limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
> -  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit);
> +  source_stmt =  find_bswap_or_nop_1 (stmt, n, limit, browsed_stmts_htab,
> +                                     used_trees);
>
>    if (!source_stmt)
>      return NULL;
> @@ -2146,16 +2198,73 @@ public:
>
>  }; // class pass_optimize_bswap
>
> +/* Tests whether the bswap optimization can be performed and is likely to 
> bring
> +   performance improvements.  This decision is made based on:
> +   - whether the operation performed is a BSWAP or a simple load
> +   - whether the operation can be replaced by an instruction (FNDECL is non
> +     NULL)
> +   - the stmt reading/loading from source operand (SRC_STMT)
> +   - whether a the source is in memory (given by N->base_addr)
> +   - whether intermediate trees defined in the expression being optimized are
> +     reused elsewhere. This is determined from the list of tree defined along
> +     with the stmt where they are defined (in USED_TREES), a hashtable of the
> +     stmts that are part of the expression (in BROWSED_STMTS_HTAB) and the
> +     outermost stmt of the expression (in STMT).  */
> +
> +static bool
> +do_bswap_p (gimple stmt, gimple src_stmt, tree fndecl,
> +           tree load_type ATTRIBUTE_UNUSED, struct symbolic_number *n,
> +           bool bswap, htab_t browsed_stmts_htab, struct usedtree 
> *used_trees)
> +{
> +  unsigned align ATTRIBUTE_UNUSED;
> +  struct usedtree *used;
> +  bool ret;
> +
> +  if (bswap && !fndecl)
> +    return false;
> +
> +  if (n->base_addr)
> +    {
> +      tree src = gimple_assign_rhs1 (src_stmt);
> +      align = get_object_alignment (src);
> +      if (bswap
> +         && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
> +         && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
> +       return false;
> +    }
> +
> +  for (ret = true, used = used_trees; ret && used; used = used->next)
> +    {
> +      gimple using_stmt;
> +      imm_use_iterator imm_use;
> +
> +      if (used->stmt == stmt)
> +       continue;
> +
> +      FOR_EACH_IMM_USE_STMT(using_stmt, imm_use, used->t)
> +       {
> +         if (!is_gimple_debug (using_stmt)
> +             && !htab_find_slot (browsed_stmts_htab, using_stmt, NO_INSERT))
> +           {
> +             ret = false;
> +             BREAK_FROM_IMM_USE_STMT (imm_use);
> +           }
> +       }
> +    }
> +
> +  return ret;
> +}
> +
>  /* Perform the bswap optimization: replace the statement CUR_STMT at
>     GSI with a load of type, VUSE and set-alias as described by N if a
>     memory source is involved (N->base_addr is non null), followed by
>     the builtin bswap invocation in FNDECL if BSWAP is true.  SRC_STMT
>     gives where should the replacement be made.  It also gives the
> -   source on which CUR_STMT is operating via its rhs's first tree nad
> +   source on which CUR_STMT is operating via its rhs's first tree and
>     N->range gives the size of the expression involved for maintaining
>     some statistics.  */
>
> -static bool
> +static void
>  bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, gimple src_stmt,
>                tree fndecl, tree bswap_type, tree load_type,
>                struct symbolic_number *n, bool bswap)
> @@ -2175,12 +2284,6 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator 
> gsi, gimple src_stmt,
>        gimple addr_stmt, load_stmt;
>        unsigned align;
>
> -      align = get_object_alignment (src);
> -      if (bswap
> -         && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
> -         && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
> -       return false;
> -
>        gsi_move_before (&gsi, &gsi_ins);
>        gsi = gsi_for_stmt (cur_stmt);
>
> @@ -2198,6 +2301,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator 
> gsi, gimple src_stmt,
>         }
>
>        /* Perform the load.  */
> +      align = get_object_alignment (src);
>        aligned_load_type = load_type;
>        if (align < TYPE_ALIGN (load_type))
>         aligned_load_type = build_aligned_type (load_type, align);
> @@ -2243,7 +2347,7 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator 
> gsi, gimple src_stmt,
>                        (int)n->range);
>               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
>             }
> -         return true;
> +         return;
>         }
>        else
>         {
> @@ -2300,7 +2404,32 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator 
> gsi, gimple src_stmt,
>
>    gsi_insert_after (&gsi, call, GSI_SAME_STMT);
>    gsi_remove (&gsi, true);
> -  return true;
> +}
> +
> +/* Free the struct usedtree elements from the list USED_TREES. If 
> REMOVE_STMTS
> +   is true, the stmts they reference (CUR_STMT excepted) are removed and the
> +   ssa names of their lhs is released from the function FUN.  */
> +
> +static void
> +free_usedtrees_list (struct usedtree *used_trees, bool remove_stmts,
> +                   gimple cur_stmt, function *fun)
> +{
> +  gimple_stmt_iterator gsi_rm;
> +  struct usedtree *used;
> +
> +  while ((used = used_trees))
> +    {
> +      /* Do not mark stmt for removal if it's the replaced one.  */
> +      if (remove_stmts && used->stmt != cur_stmt)
> +       {
> +         tree lhs = gimple_assign_lhs (used->stmt);
> +         gsi_rm = gsi_for_stmt (used->stmt);
> +         gsi_remove (&gsi_rm, true);
> +         release_ssa_name_fn (fun, lhs);
> +       }
> +      used_trees = used->next;
> +      free (used);
> +    }

At this point I'd rather leave DCE to DCE ...

>  }
>
>  /* Find manual byte swap implementations as well as load in a given
> @@ -2315,6 +2444,7 @@ pass_optimize_bswap::execute (function *fun)
>    bool bswap16_p, bswap32_p, bswap64_p;
>    bool changed = false;
>    tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = 
> NULL_TREE;
> +  htab_t browsed_stmts_htab;
>
>    if (BITS_PER_UNIT != 8)
>      return 0;
> @@ -2350,6 +2480,9 @@ pass_optimize_bswap::execute (function *fun)
>    memset (&nop_stats, 0, sizeof (nop_stats));
>    memset (&bswap_stats, 0, sizeof (bswap_stats));
>
> +  browsed_stmts_htab = htab_create_alloc (16, htab_hash_pointer,
> +                                         htab_eq_pointer, 0, xcalloc, free);
> +
>    FOR_EACH_BB_FN (bb, fun)
>      {
>        gimple_stmt_iterator gsi;
> @@ -2360,19 +2493,29 @@ pass_optimize_bswap::execute (function *fun)
>          patterns, the wider variant wouldn't be detected.  */
>        for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
>          {
> -         gimple src_stmt, cur_stmt = gsi_stmt (gsi);
> +         gimple_stmt_iterator gsi_cont;
> +         gimple src_stmt, cur_stmt;
>           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
>           struct symbolic_number n;
> -         bool bswap;
> +         struct usedtree *used_trees;
> +         bool bswap, rm_stmts;
> +
> +bswap_check_start_iter:

Ick - do we really have to use gotos here?  Can you refactor this
a bit to avoid it?

I think the overall idea is sound and if you have time to adjust according
to my comments that would be nice.

Sorry for the very late review.

Thanks,
Richard.

> +         rm_stmts = false;
> +         cur_stmt = gsi_stmt (gsi);
>
>           if (!is_gimple_assign (cur_stmt)
>               || gimple_assign_rhs_code (cur_stmt) != BIT_IOR_EXPR)
>             continue;
>
> -         src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
> +         htab_empty (browsed_stmts_htab);
> +         used_trees = NULL;
> +
> +         src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
> +                                       browsed_stmts_htab, &used_trees);
>
>           if (!src_stmt)
> -           continue;
> +           goto bswap_check_end_iter;
>
>           switch (n.range)
>             {
> @@ -2401,15 +2544,37 @@ pass_optimize_bswap::execute (function *fun)
>                 }
>               break;
>             default:
> -             continue;
> +             goto bswap_check_end_iter;
>             }
>
> -         if (bswap && !fndecl)
> -           continue;
> -
> -         if (bswap_replace (cur_stmt, gsi, src_stmt, fndecl, bswap_type,
> -                            load_type, &n, bswap))
> -           changed = true;
> +         if (!do_bswap_p (cur_stmt, src_stmt, fndecl, load_type, &n, bswap,
> +                          browsed_stmts_htab, used_trees))
> +           goto bswap_check_end_iter;
> +
> +         changed = true;
> +         gsi_cont = gsi_for_stmt (cur_stmt);
> +         gsi_next (&gsi_cont);
> +         bswap_replace (cur_stmt, gsi, src_stmt, fndecl, bswap_type,
> +                        load_type, &n, bswap);
> +         rm_stmts = true;
> +
> +bswap_check_end_iter:
> +         free_usedtrees_list (used_trees, rm_stmts, cur_stmt, fun);
> +
> +         /* cur_stmt may have been moved in bswap_replace (in case of a
> +            load) and/or replaced (in case of bswap).  Ideally, we want the
> +            next iteration to check the first stmt that was before cur_stmt
> +            initially that is not an unused stmt.  However it is easier and
> +            simpler to allow the next iteration to check the changed stmt.  
> */
> +         if (rm_stmts)
> +           {
> +             if (gsi_end_p (gsi_cont))
> +               gsi_cont = gsi_last_bb (bb);
> +             else
> +               gsi_prev (&gsi_cont);
> +             gsi = gsi_cont;
> +             goto bswap_check_start_iter;
> +           }
>         }
>      }
>
> Tested via bootstrap on x86_64-linux-gnu and no regression of the
> testsuite (new tests passing) as well as through running the testsuite
> compiled by arm-none-eabi-gcc cross compiler under qemu emulating
> Cortex M3 without any regression (new tests passing).
>
> Ok for trunk?
>
> Best regards,
>
> Thomas
>
>

Re: [PATCH] Cancel bswap opt when intermediate stmts are reused

Reply via email to