Christophe Lyon wrote:
> Hi,
> 
> The attached patch catches C constructs:
> (A << 8) | (A >> 8)
> where A is unsigned 16 bits
> and maps them to builtin_bswap16(A) which can provide more efficient
> implementations on some targets.
> 
> The construct above is equivalent to the default bswap16 implementation.
> 
> I have added a testcase for ARM, and have found no regression with
> qemu-arm on arm-none-linux-gnueabi.
> 
> OK?
> 
> Christophe
> 
> 2012-09-19  Christophe Lyon <christophe.l...@linaro.org>
> 
>       gcc/
>       * fold-const.c (fold_binary_loc): call builtin_bswap16 when the
>       equivalent construct is detected.
> 
>       gcc/testsuite/
>       * gcc.target/arm/builtin-bswap-2.c: New testcase.
> 
> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
> index 2bf5179..0ff7e8b 100644
> --- a/gcc/fold-const.c
> +++ b/gcc/fold-const.c
> @@ -10326,6 +10326,99 @@ fold_binary_loc (location_t loc,
>         }
>        }
>  
> +      /* Catch bswap16 construct: (A << 8) | (A >> 8) where A is
> +         unsigned 16 bits.
> +         This has been expanded into:
> +      (ior:SI (lshift:SI (nop:SI A:HI) 8)
> +              (nop:SI (rshift:HI A:HI 8)))
> +      */

This seems overly complicated on 16-bit platforms where (A << 8) | (A >> 8)
is the same as rotate:HI (A 8).

Does your patch make sure that (A << 8) | (A >> 8) is still mapped to ROTATE
on these targets?

Johann



> +      {
> +     enum tree_code code0, code1;
> +     tree my_arg0 = arg0;
> +     tree my_arg1= arg1;
> +     tree rtype;
> +
> +     code0 = TREE_CODE (arg0);
> +     code1 = TREE_CODE (arg1);
> +     if (code1 == NOP_EXPR)
> +       {
> +         my_arg1 = TREE_OPERAND (arg1, 0);
> +         code1 = TREE_CODE (my_arg1);
> +       }
> +     else if (code0 == NOP_EXPR)
> +       {
> +         my_arg0 = TREE_OPERAND (arg0, 0);
> +         code0 = TREE_CODE (my_arg0);
> +       }
> +
> +     /* Handle (A << C1) + (A >> C1).  */
> +     if ((code1 == RSHIFT_EXPR && code0 == LSHIFT_EXPR)
> +          && (TREE_CODE (TREE_OPERAND (my_arg0, 0)) == NOP_EXPR)
> +          && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg0, 0), 0),
> +                              TREE_OPERAND (my_arg1, 0), 0)
> +          && (rtype = TREE_TYPE (TREE_OPERAND (my_arg1, 0)),
> +              TYPE_UNSIGNED (rtype)))
> +       {
> +         tree tree01, tree11;
> +         enum tree_code code01, code11;
> +
> +         tree01 = TREE_OPERAND (my_arg0, 1);
> +         tree11 = TREE_OPERAND (my_arg1, 1);
> +         STRIP_NOPS (tree01);
> +         STRIP_NOPS (tree11);
> +         code01 = TREE_CODE (tree01);
> +         code11 = TREE_CODE (tree11);
> +
> +         /* Check that shift amount is 8, and input 16 bits wide.  */
> +         if (code01 == INTEGER_CST
> +             && code11 == INTEGER_CST
> +             && TREE_INT_CST_HIGH (tree01) == 0
> +             && TREE_INT_CST_HIGH (tree11) == 0
> +             && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11)
> +             && TREE_INT_CST_LOW (tree01) == 8
> +             && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg1, 0))) == 16)
> +           {
> +             tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16);
> +             return build_call_expr_loc (loc, bswapfn, 1,
> +                                         TREE_OPERAND (my_arg1, 0));
> +           }
> +       }
> +
> +     /* Handle (A >> C1) + (A << C1).  */
> +     else if ((code0 == RSHIFT_EXPR && code1 == LSHIFT_EXPR)
> +              && (TREE_CODE (TREE_OPERAND (my_arg1, 0)) == NOP_EXPR)
> +              && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg1, 0),
> +                                                0),
> +                                  TREE_OPERAND (my_arg0, 0), 0)
> +              && (rtype = TREE_TYPE (TREE_OPERAND (my_arg0, 0)),
> +                  TYPE_UNSIGNED (rtype)))
> +       {
> +         tree tree01, tree11;
> +         enum tree_code code01, code11;
> +
> +         tree01 = TREE_OPERAND (my_arg0, 1);
> +         tree11 = TREE_OPERAND (my_arg1, 1);
> +         STRIP_NOPS (tree01);
> +         STRIP_NOPS (tree11);
> +         code01 = TREE_CODE (tree01);
> +         code11 = TREE_CODE (tree11);
> +
> +         /* Check that shift amount is 8, and input 16 bits wide.  */
> +         if (code01 == INTEGER_CST
> +             && code11 == INTEGER_CST
> +             && TREE_INT_CST_HIGH (tree01) == 0
> +             && TREE_INT_CST_HIGH (tree11) == 0
> +             && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11)
> +             && TREE_INT_CST_LOW (tree01) == 8
> +             && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg0, 0))) == 16)
> +           {
> +             tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16);
> +             return build_call_expr_loc (loc, bswapfn, 1,
> +                                         TREE_OPERAND (my_arg0, 0));
> +           }
> +       }
> +      }
> +
>      associate:
>        /* In most languages, can't associate operations on floats through
>        parentheses.  Rather than remember where the parentheses were, we
> diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c 
> b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c
> new file mode 100644
> index 0000000..93dbb35
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-require-effective-target arm_arch_v6_ok } */
> +/* { dg-add-options arm_arch_v6 } */
> +/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
> +
> +unsigned short swapu16_1 (unsigned short x)
> +{
> +  return (x << 8) | (x >> 8);
> +}
> +
> +unsigned short swapu16_2 (unsigned short x)
> +{
> +  return (x >> 8) | (x << 8);
> +}
> +
> +unsigned int swapu32_1 (unsigned int x)
> +{
> +  return (x << 16) | (x >> 16);
> +}
> +
> +unsigned int swapu32_2 (unsigned int x)
> +{
> +  return (x >> 16) | (x << 16);
> +}


Reply via email to