On Mon, Jun 29, 2026 at 4:17 PM Abhishek Kaushik
<[email protected]> wrote:
>
> For a widening conversion of the result of a signed multiply by a
> positive power of two, rewrite the sequence to widen the multiplicand
> before multiplying.  This exposes the form that AArch64 can emit as
> SBFIZ and avoids a separate sign extension.  The rewrite is valid
> because overflow in the original signed multiply is undefined.
>
> gcc/
>         * config/aarch64/aarch64.cc (aarch64_try_widen_mult_by_pow2): New
>         function.
>         (aarch64_instruction_selection): Call it for conversion assignments.
>
> gcc/testsuite/
>         * gcc.target/aarch64/sbfiz-widen-mult-1.c: New test.
> ---
>  gcc/config/aarch64/aarch64.cc                 | 83 +++++++++++++++++++
>  .../gcc.target/aarch64/sbfiz-widen-mult-1.c   | 13 +++
>  2 files changed, 96 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 124e6dc37cc..3e825d88957 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -2229,6 +2229,85 @@ aarch64_preferred_else_value (unsigned, tree, unsigned 
> int nops, tree *ops)
>    return nops == 3 ? ops[2] : ops[0];
>  }
>
> +/* Try to widen a signed, overflow-undefined multiply by a power of two 
> before
> +   converting it to a wider integral type.
> +
> +   This helps AArch64 instruction selection expose a form that can be emitted
> +   as SBFIZ, avoiding an otherwise separate sign-extension around the
> +   shift/bitfield operation.
> +
> +   For example, rewrite:
> +
> +     _2 = _1 * 2;
> +     _3 = (long int) _2;
> +
> +   into:
> +
> +     _6 = (long int) _1;
> +     _3 = _6 * 2;
> +
> +   This is valid because overflow in the original narrow signed multiply is
> +   undefined.  For all defined executions, widening the multiplicand before 
> the
> +   multiply produces the same value as multiplying in the narrow type and 
> then
> +   converting the result.
> +
> +   The original narrow multiply is removed immediately.  There is no DCE pass
> +   after AArch64 instruction selection, so leaving it behind would keep dead
> +   multiplications in the final optimized GIMPLE dump.  */
> +static bool
> +aarch64_try_widen_mult_by_pow2 (const gassign *convert,
> +                               gimple_stmt_iterator *gsi)
> +{
> +  tree type = TREE_TYPE (gimple_assign_lhs (convert));
> +  tree inner = gimple_assign_rhs1 (convert);
> +  tree inner_type = TREE_TYPE (inner);

I think we want to check that both types are type_has_mode_precision_p here.
Maybe just precision `<= BITS_PER_WORD`.
This is so you don't  introduce a multiply in TImode.
That is:
```
__int128_t extend_64to128_by2  (long long x)  { return x * 2; }
__int128_t extend_64to128_by2_  (long long x)
{
    __int128_t t = x;
    return t * 2;
}
```
The first is better than the second.

> +
> +  if (!INTEGRAL_TYPE_P (type)
> +      || !INTEGRAL_TYPE_P (inner_type)
> +      || !TYPE_OVERFLOW_UNDEFINED (inner_type)
> +      || TYPE_PRECISION (type) <= TYPE_PRECISION (inner_type)
> +      || TREE_CODE (inner) != SSA_NAME
> +      || !has_single_use (inner))
> +    return false;
> +
> +  gimple *stmt = SSA_NAME_DEF_STMT (inner);
> +  if (!is_gimple_assign (stmt)
> +      || gimple_assign_rhs_code (stmt) != MULT_EXPR)
> +    return false;
> +
> +  tree multiplicand = gimple_assign_rhs1 (stmt);
> +  tree pow2const = gimple_assign_rhs2 (stmt);
> +  if (!integer_pow2p (pow2const)
> +      || tree_int_cst_sgn (pow2const) <= 0)
> +    return false;
> +
> +  gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
> +
> +  tree widened_multiplicand = make_ssa_name (type);
> +  gassign *convert_stmt
> +    = gimple_build_assign (widened_multiplicand, NOP_EXPR, multiplicand);
> +
> +  gsi_insert_before (gsi, convert_stmt, GSI_SAME_STMT);

Maybe use gimple_convert instead?

> +
> +  tree widened_pow2const = fold_convert (type, pow2const);
> +
> +  tree mul_lhs = gimple_assign_lhs (convert);
> +  gassign *mul_stmt
> +    = gimple_build_assign (mul_lhs, MULT_EXPR,
> +                          widened_multiplicand,
> +                          widened_pow2const);
> +
> +  gsi_replace (gsi, mul_stmt, true);
> +
> +  /* INNER was used only by CONVERT, which we just replaced.  The defining
> +     multiply is therefore dead, so remove it.  */
> +  gcc_checking_assert (has_zero_uses (inner));
> +  gsi_remove (&stmt_gsi, true);
> +  release_defs (stmt);
> +
> +  return true;
> +}
> +
>  /* Implement TARGET_INSTRUCTION_SELECTION.  The target hook is used to
>     change generic sequences to a form AArch64 has an easier time expanding
>     instructions for.  It's not supposed to be used for generic rewriting that
> @@ -2243,6 +2322,10 @@ aarch64_instruction_selection (function * /* fun */, 
> gimple_stmt_iterator *gsi)
>    if (!assign)
>      return false;
>
> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
> +      && aarch64_try_widen_mult_by_pow2 (assign, gsi))
> +    return true;
> +
>    /* Convert
>         p == q ? s1 : s2;
>       to
> diff --git a/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c
> new file mode 100644
> index 00000000000..fa664eaa75e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sbfiz-widen-mult-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdint.h>
> +
> +int32_t extend_8to32_by2  (int8_t x)  { return x * 2; }
> +int32_t extend_16to32_by2 (int16_t x) { return x * 2; }
> +int64_t extend_8to64_by2  (int8_t x)  { return x * 2; }
> +int64_t extend_16to64_by2 (int16_t x) { return x * 2; }
> +int64_t extend_32to64_by2 (int32_t x) { return 2 * x; }
> +
> +/* { dg-final { scan-assembler-times "\\tsbfiz\\t" 5 } } */
> +/* { dg-final { scan-assembler-not "\\tsxtw\\t" } } */

I think we should use check-function-bodies instead of
scan-assembler-times here.

> --
> 2.43.0
>

Reply via email to