Christophe Lyon wrote: > Hi, > > The attached patch catches C constructs: > (A << 8) | (A >> 8) > where A is unsigned 16 bits > and maps them to builtin_bswap16(A) which can provide more efficient > implementations on some targets. > > The construct above is equivalent to the default bswap16 implementation. > > I have added a testcase for ARM, and have found no regression with > qemu-arm on arm-none-linux-gnueabi. > > OK? > > Christophe > > 2012-09-19 Christophe Lyon <christophe.l...@linaro.org> > > gcc/ > * fold-const.c (fold_binary_loc): call builtin_bswap16 when the > equivalent construct is detected. > > gcc/testsuite/ > * gcc.target/arm/builtin-bswap-2.c: New testcase. > > diff --git a/gcc/fold-const.c b/gcc/fold-const.c > index 2bf5179..0ff7e8b 100644 > --- a/gcc/fold-const.c > +++ b/gcc/fold-const.c > @@ -10326,6 +10326,99 @@ fold_binary_loc (location_t loc, > } > } > > + /* Catch bswap16 construct: (A << 8) | (A >> 8) where A is > + unsigned 16 bits. > + This has been expanded into: > + (ior:SI (lshift:SI (nop:SI A:HI) 8) > + (nop:SI (rshift:HI A:HI 8))) > + */
This seems overly complicated on 16-bit platforms where (A << 8) | (A >> 8) is the same as rotate:HI (A 8). Does your patch make sure that (A << 8) | (A >> 8) is still mapped to ROTATE on these targets? Johann > + { > + enum tree_code code0, code1; > + tree my_arg0 = arg0; > + tree my_arg1= arg1; > + tree rtype; > + > + code0 = TREE_CODE (arg0); > + code1 = TREE_CODE (arg1); > + if (code1 == NOP_EXPR) > + { > + my_arg1 = TREE_OPERAND (arg1, 0); > + code1 = TREE_CODE (my_arg1); > + } > + else if (code0 == NOP_EXPR) > + { > + my_arg0 = TREE_OPERAND (arg0, 0); > + code0 = TREE_CODE (my_arg0); > + } > + > + /* Handle (A << C1) + (A >> C1). */ > + if ((code1 == RSHIFT_EXPR && code0 == LSHIFT_EXPR) > + && (TREE_CODE (TREE_OPERAND (my_arg0, 0)) == NOP_EXPR) > + && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg0, 0), 0), > + TREE_OPERAND (my_arg1, 0), 0) > + && (rtype = TREE_TYPE (TREE_OPERAND (my_arg1, 0)), > + TYPE_UNSIGNED (rtype))) > + { > + tree tree01, tree11; > + enum tree_code code01, code11; > + > + tree01 = TREE_OPERAND (my_arg0, 1); > + tree11 = TREE_OPERAND (my_arg1, 1); > + STRIP_NOPS (tree01); > + STRIP_NOPS (tree11); > + code01 = TREE_CODE (tree01); > + code11 = TREE_CODE (tree11); > + > + /* Check that shift amount is 8, and input 16 bits wide. */ > + if (code01 == INTEGER_CST > + && code11 == INTEGER_CST > + && TREE_INT_CST_HIGH (tree01) == 0 > + && TREE_INT_CST_HIGH (tree11) == 0 > + && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11) > + && TREE_INT_CST_LOW (tree01) == 8 > + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg1, 0))) == 16) > + { > + tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16); > + return build_call_expr_loc (loc, bswapfn, 1, > + TREE_OPERAND (my_arg1, 0)); > + } > + } > + > + /* Handle (A >> C1) + (A << C1). */ > + else if ((code0 == RSHIFT_EXPR && code1 == LSHIFT_EXPR) > + && (TREE_CODE (TREE_OPERAND (my_arg1, 0)) == NOP_EXPR) > + && operand_equal_p (TREE_OPERAND (TREE_OPERAND (my_arg1, 0), > + 0), > + TREE_OPERAND (my_arg0, 0), 0) > + && (rtype = TREE_TYPE (TREE_OPERAND (my_arg0, 0)), > + TYPE_UNSIGNED (rtype))) > + { > + tree tree01, tree11; > + enum tree_code code01, code11; > + > + tree01 = TREE_OPERAND (my_arg0, 1); > + tree11 = TREE_OPERAND (my_arg1, 1); > + STRIP_NOPS (tree01); > + STRIP_NOPS (tree11); > + code01 = TREE_CODE (tree01); > + code11 = TREE_CODE (tree11); > + > + /* Check that shift amount is 8, and input 16 bits wide. */ > + if (code01 == INTEGER_CST > + && code11 == INTEGER_CST > + && TREE_INT_CST_HIGH (tree01) == 0 > + && TREE_INT_CST_HIGH (tree11) == 0 > + && TREE_INT_CST_LOW (tree01) == TREE_INT_CST_LOW (tree11) > + && TREE_INT_CST_LOW (tree01) == 8 > + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (my_arg0, 0))) == 16) > + { > + tree bswapfn = builtin_decl_explicit (BUILT_IN_BSWAP16); > + return build_call_expr_loc (loc, bswapfn, 1, > + TREE_OPERAND (my_arg0, 0)); > + } > + } > + } > + > associate: > /* In most languages, can't associate operations on floats through > parentheses. Rather than remember where the parentheses were, we > diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c > b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c > new file mode 100644 > index 0000000..93dbb35 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-2.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-require-effective-target arm_arch_v6_ok } */ > +/* { dg-add-options arm_arch_v6 } */ > +/* { dg-final { scan-assembler-not "orr\[ \t\]" } } */ > + > +unsigned short swapu16_1 (unsigned short x) > +{ > + return (x << 8) | (x >> 8); > +} > + > +unsigned short swapu16_2 (unsigned short x) > +{ > + return (x >> 8) | (x << 8); > +} > + > +unsigned int swapu32_1 (unsigned int x) > +{ > + return (x << 16) | (x >> 16); > +} > + > +unsigned int swapu32_2 (unsigned int x) > +{ > + return (x >> 16) | (x << 16); > +}