Re: [PATCH] expansion: Further improve double-word modulo, division and divmod [PR97459]

Richard Biener Tue, 01 Dec 2020 23:52:25 -0800

On Tue, 1 Dec 2020, Jakub Jelinek wrote:

> Hi!
> 
> The following patch implements what Thomas wrote about, in particular
> that we can handle also double-word divison by the constants for which
> the earlier patch optimized modulo (if it would be otherwise a library
> call) and that we can also easily handle such constants shifted to the left.
> Unfortunately, seems CSE isn't able to optimize away the two almost
> identical sequences (one to compute remainder, one to compute quotient),
> probably because of the ADD_OVERFLOW introduced jumps, so the patch also
> adjusts expand_DIVMOD.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.

Richard.

> 2020-12-01  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR rtl-optimization/97459
>       * optabs.h (expand_doubleword_divmod): Declare.
>       * optabs.c (expand_doubleword_divmod): New function.
>       (expand_binop): Use it.
>       * internal-fn.c (expand_DIVMOD): Likewise.
> 
>       * gcc.target/i386/pr97282.c (foo): Use 123456 divisor instead of
>       10.
>       * gcc.dg/pr97459-1.c (TESTS): Add tests for 10, 12 and
>       6144.
>       * gcc.dg/pr97459-2.c (TESTS): Likewise.
>       * gcc.dg/pr97459-3.c: New test.
>       * gcc.dg/pr97459-4.c: New test.
>       * gcc.dg/pr97459-5.c: New test.
>       * gcc.dg/pr97459-6.c: New test.
> 
> --- gcc/optabs.h.jj   2020-12-01 13:19:12.831127718 +0100
> +++ gcc/optabs.h      2020-12-01 17:40:08.783137431 +0100
> @@ -183,6 +183,8 @@ extern bool force_expand_binop (machine_
>                               enum optab_methods);
>  extern rtx expand_vector_broadcast (machine_mode, rtx);
>  
> +extern rtx expand_doubleword_divmod (machine_mode, rtx, rtx, rtx *, bool);
> +
>  /* Generate code for a simple binary or unary operation.  "Simple" in
>     this case means "can be unambiguously described by a (mode, code)
>     pair and mapped to a single optab."  */
> --- gcc/optabs.c.jj   2020-12-01 16:25:01.567733333 +0100
> +++ gcc/optabs.c      2020-12-01 18:11:12.878230572 +0100
> @@ -1118,6 +1118,99 @@ expand_doubleword_mod (machine_mode mode
>      }
>    return NULL_RTX;
>  }
> +
> +/* Similarly to the above function, but compute both quotient and remainder.
> +   Quotient can be computed from the remainder as:
> +   rem = op0 % op1;  // Handled using expand_doubleword_mod
> +   quot = (op0 - rem) * inv; // inv is multiplicative inverse of op1 modulo
> +                          // 2 * BITS_PER_WORD
> +
> +   We can also handle cases where op1 is a multiple of power of two constant
> +   and constant handled by expand_doubleword_mod.
> +   op11 = 1 << __builtin_ctz (op1);
> +   op12 = op1 / op11;
> +   rem1 = op0 % op12;  // Handled using expand_doubleword_mod
> +   quot1 = (op0 - rem1) * inv; // inv is multiplicative inverse of op12 
> modulo
> +                            // 2 * BITS_PER_WORD
> +   rem = (quot1 % op11) * op12 + rem1;
> +   quot = quot1 / op11;  */
> +
> +rtx
> +expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem,
> +                       bool unsignedp)
> +{
> +  *rem = NULL_RTX;
> +
> +  /* Negative dividend should have been optimized into positive,
> +     similarly modulo by 1 and modulo by power of two is optimized
> +     differently too.  */
> +  if (INTVAL (op1) <= 1 || pow2p_hwi (INTVAL (op1)))
> +    return NULL_RTX;
> +
> +  rtx op11 = const1_rtx;
> +  rtx op12 = op1;
> +  if ((INTVAL (op1) & 1) == 0)
> +    {
> +      int bit = ctz_hwi (INTVAL (op1));
> +      op11 = GEN_INT (HOST_WIDE_INT_1 << bit);
> +      op12 = GEN_INT (INTVAL (op1) >> bit);
> +    }
> +
> +  rtx rem1 = expand_doubleword_mod (mode, op0, op12, unsignedp);
> +  if (rem1 == NULL_RTX)
> +    return NULL_RTX;
> +
> +  int prec = 2 * BITS_PER_WORD;
> +  wide_int a = wide_int::from (INTVAL (op12), prec + 1, UNSIGNED);
> +  wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
> +  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
> +  rtx inv = immed_wide_int_const (m, mode);
> +
> +  rtx_insn *last = get_last_insn ();
> +  rtx quot1 = expand_simple_binop (mode, MINUS, op0, rem1,
> +                                NULL_RTX, unsignedp, OPTAB_DIRECT);
> +  if (quot1 == NULL_RTX)
> +    return NULL_RTX;
> +
> +  quot1 = expand_simple_binop (mode, MULT, quot1, inv,
> +                            NULL_RTX, unsignedp, OPTAB_DIRECT);
> +  if (quot1 == NULL_RTX)
> +    return NULL_RTX;
> +
> +  if (op11 != const1_rtx)
> +    {
> +      rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11,
> +                             NULL_RTX, unsignedp);
> +      if (rem2 == NULL_RTX)
> +     return NULL_RTX;
> +
> +      rem2 = expand_simple_binop (mode, MULT, rem2, op12, NULL_RTX,
> +                               unsignedp, OPTAB_DIRECT);
> +      if (rem2 == NULL_RTX)
> +     return NULL_RTX;
> +
> +      rem2 = expand_simple_binop (mode, PLUS, rem2, rem1, NULL_RTX,
> +                               unsignedp, OPTAB_DIRECT);
> +      if (rem2 == NULL_RTX)
> +     return NULL_RTX;
> +
> +      rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11,
> +                              NULL_RTX, unsignedp);
> +      if (quot2 == NULL_RTX)
> +     return NULL_RTX;
> +
> +      rem1 = rem2;
> +      quot1 = quot2;
> +    }
> +
> +  /* Punt if we need any library calls.  */
> +  for (; last; last = NEXT_INSN (last))
> +    if (CALL_P (last))
> +      return NULL_RTX;
> +
> +  *rem = rem1;
> +  return quot1;
> +}
>  
>  /* Wrapper around expand_binop which takes an rtx code to specify
>     the operation to perform, not an optab pointer.  All other
> @@ -1999,7 +2092,10 @@ expand_binop (machine_mode mode, optab b
>      }
>  
>    /* Attempt to synthetize double word modulo by constant divisor.  */
> -  if ((binoptab == umod_optab || binoptab == smod_optab)
> +  if ((binoptab == umod_optab
> +       || binoptab == smod_optab
> +       || binoptab == udiv_optab
> +       || binoptab == sdiv_optab)
>        && optimize
>        && CONST_INT_P (op1)
>        && is_int_mode (mode, &int_mode)
> @@ -2008,21 +2104,33 @@ expand_binop (machine_mode mode, optab b
>        && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
>        && optimize_insn_for_speed_p ())
>      {
> -      rtx remainder = expand_doubleword_mod (int_mode, op0, op1,
> -                                          binoptab == umod_optab);
> -      if (remainder != NULL_RTX)
> +      rtx res = NULL_RTX;
> +      if ((binoptab == umod_optab || binoptab == smod_optab)
> +       && (INTVAL (op1) & 1) == 0)
> +     res = expand_doubleword_mod (int_mode, op0, op1,
> +                                  binoptab == umod_optab);
> +      else
> +     {
> +       rtx quot = expand_doubleword_divmod (int_mode, op0, op1, &res,
> +                                            binoptab == umod_optab
> +                                            || binoptab == udiv_optab);
> +       if (quot == NULL_RTX)
> +         res = NULL_RTX;
> +       else if (binoptab == udiv_optab || binoptab == sdiv_optab)
> +         res = quot;
> +     }
> +      if (res != NULL_RTX)
>       {
>         if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
>           {
> -           rtx_insn *move = emit_move_insn (target ? target : remainder,
> -                                            remainder);
> -           set_dst_reg_note (move,
> -                             REG_EQUAL,
> -                             gen_rtx_fmt_ee (UMOD, int_mode,
> -                                             copy_rtx (op0), op1),
> -                             target ? target : remainder);
> +           rtx_insn *move = emit_move_insn (target ? target : res,
> +                                            res);
> +           set_dst_reg_note (move, REG_EQUAL,
> +                             gen_rtx_fmt_ee (optab_to_code (binoptab),
> +                                             int_mode, copy_rtx (op0), op1),
> +                             target ? target : res);
>           }
> -       return remainder;
> +       return res;
>       }
>        else
>       delete_insns_since (last);
> --- gcc/internal-fn.c.jj      2020-11-30 10:55:33.134963320 +0100
> +++ gcc/internal-fn.c 2020-12-01 18:18:20.964436077 +0100
> @@ -3230,27 +3230,68 @@ expand_DIVMOD (internal_fn, gcall *call_
>        the division and modulo and if it emits any library calls or any
>        {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or
>        divmod libcall.  */
> -      struct separate_ops ops;
> -      ops.code = TRUNC_DIV_EXPR;
> -      ops.type = type;
> -      ops.op0 = make_tree (ops.type, op0);
> -      ops.op1 = arg1;
> -      ops.op2 = NULL_TREE;
> -      ops.location = gimple_location (call_stmt);
> -      start_sequence ();
> -      quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> -      if (contains_call_div_mod (get_insns ()))
> -     quotient = NULL_RTX;
> -      else
> +      scalar_int_mode int_mode;
> +      if (remainder == NULL_RTX
> +       && optimize
> +       && CONST_INT_P (op1)
> +       && !pow2p_hwi (INTVAL (op1))
> +       && is_int_mode (TYPE_MODE (type), &int_mode)
> +       && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
> +       && optab_handler (and_optab, word_mode) != CODE_FOR_nothing
> +       && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
> +       && optimize_insn_for_speed_p ())
>       {
> -       ops.code = TRUNC_MOD_EXPR;
> -       remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
> +       rtx_insn *last = get_last_insn ();
> +       remainder = NULL_RTX;
> +       quotient = expand_doubleword_divmod (int_mode, op0, op1, &remainder,
> +                                            TYPE_UNSIGNED (type));
> +       if (quotient != NULL_RTX)
> +         {
> +           if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
> +             {
> +               rtx_insn *move = emit_move_insn (quotient, quotient);
> +               set_dst_reg_note (move, REG_EQUAL,
> +                                 gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
> +                                                 ? UDIV : DIV, int_mode,
> +                                                 copy_rtx (op0), op1),
> +                                 quotient);
> +               move = emit_move_insn (remainder, remainder);
> +               set_dst_reg_note (move, REG_EQUAL,
> +                                 gen_rtx_fmt_ee (TYPE_UNSIGNED (type)
> +                                                 ? UMOD : MOD, int_mode,
> +                                                 copy_rtx (op0), op1),
> +                                 quotient);
> +             }
> +         }
> +       else
> +         delete_insns_since (last);
> +     }
> +
> +      if (remainder == NULL_RTX)
> +     {
> +       struct separate_ops ops;
> +       ops.code = TRUNC_DIV_EXPR;
> +       ops.type = type;
> +       ops.op0 = make_tree (ops.type, op0);
> +       ops.op1 = arg1;
> +       ops.op2 = NULL_TREE;
> +       ops.location = gimple_location (call_stmt);
> +       start_sequence ();
> +       quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
>         if (contains_call_div_mod (get_insns ()))
> -         remainder = NULL_RTX;
> +         quotient = NULL_RTX;
> +       else
> +         {
> +           ops.code = TRUNC_MOD_EXPR;
> +           remainder = expand_expr_real_2 (&ops, NULL_RTX, mode,
> +                                           EXPAND_NORMAL);
> +           if (contains_call_div_mod (get_insns ()))
> +             remainder = NULL_RTX;
> +         }
> +       if (remainder)
> +         insns = get_insns ();
> +       end_sequence ();
>       }
> -      if (remainder)
> -     insns = get_insns ();
> -      end_sequence ();
>      }
>  
>    if (remainder)
> --- gcc/testsuite/gcc.target/i386/pr97282.c.jj        2020-10-06 
> 10:32:14.769771587 +0200
> +++ gcc/testsuite/gcc.target/i386/pr97282.c   2020-12-01 21:52:57.901708048 
> +0100
> @@ -18,8 +18,8 @@ foo (T x)
>    unsigned long ret = 0;
>    while (x > 0)
>      {
> -      ret = ret + x % 10;
> -      x = x / 10;
> +      ret = ret + x % 123456;
> +      x = x / 123456;
>      }
>    return ret;
>  }
> --- gcc/testsuite/gcc.dg/pr97459-1.c.jj       2020-11-30 10:55:33.135963309 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-1.c  2020-12-01 18:23:25.243031163 +0100
> @@ -24,7 +24,7 @@ T __attribute__((noipa)) foo (T x, T n)
>  #define C3(n) C2(n##0) C2(n##4) C2(n##9)
>  #define C4(n) C3(n##0) C3(n##3) C3(n##7)
>  #endif
> -#define TESTS C4(1)
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
>  
>  TESTS
>  
> --- gcc/testsuite/gcc.dg/pr97459-2.c.jj       2020-11-30 10:55:33.136963298 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-2.c  2020-12-01 18:23:51.423738268 +0100
> @@ -26,7 +26,7 @@ T __attribute__((noipa)) foo (T x, T n)
>  #define C3(n) C2(n##0) C2(n##4) C2(n##9)
>  #define C4(n) C3(n##0) C3(n##3) C3(n##7)
>  #endif
> -#define TESTS C4(1)
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
>  
>  TESTS
>  
> --- gcc/testsuite/gcc.dg/pr97459-3.c.jj       2020-12-01 18:25:38.452540896 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-3.c  2020-12-01 18:26:00.662292428 +0100
> @@ -0,0 +1,54 @@
> +/* PR rtl-optimization/97459 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef __uint128_t T;
> +#else
> +typedef unsigned long long T;
> +#endif
> +
> +T __attribute__((noipa)) foo (T x, T n) { return x / n; }
> +#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); 
> }
> +
> +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
> +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
> +           C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
> +#ifdef EXPENSIVE
> +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
> +           C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
> +           C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
> +#else
> +#define C3(n) C2(n##0) C2(n##4) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##3) C3(n##7)
> +#endif
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
> +
> +TESTS
> +
> +struct S { T x; T (*foo) (T); };
> +
> +#undef C
> +#define C(n) { n - 10000, foo##n },
> +
> +struct S tests[] = {
> +TESTS
> +  { 0, 0 }
> +};
> +
> +int
> +main ()
> +{
> +  int i, j, k;
> +  for (k = 0; tests[k].x; k++)
> +    for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
> +      for (j = -5; j <= 5; j++)
> +     {
> +       T x = ((T) 1 << i) + j;
> +       if (foo (x, tests[k].x) != tests[k].foo (x))
> +         __builtin_abort ();
> +     }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr97459-4.c.jj       2020-12-01 18:26:10.272184915 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-4.c  2020-12-01 18:26:27.345993905 +0100
> @@ -0,0 +1,57 @@
> +/* PR rtl-optimization/97459 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef __int128_t T;
> +typedef __uint128_t U;
> +#else
> +typedef long long T;
> +typedef unsigned long long U;
> +#endif
> +
> +T __attribute__((noipa)) foo (T x, T n) { return x / n; }
> +#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); 
> }
> +
> +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
> +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
> +           C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
> +#ifdef EXPENSIVE
> +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
> +           C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
> +           C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
> +#else
> +#define C3(n) C2(n##0) C2(n##4) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##3) C3(n##7)
> +#endif
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
> +
> +TESTS
> +
> +struct S { T x; T (*foo) (T); };
> +
> +#undef C
> +#define C(n) { n - 10000, foo##n },
> +
> +struct S tests[] = {
> +TESTS
> +  { 0, 0 }
> +};
> +
> +int
> +main ()
> +{
> +  int i, j, k;
> +  for (k = 0; tests[k].x; k++)
> +    for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
> +      for (j = -5; j <= 5; j++)
> +     {
> +       U x = ((U) 1 << i) + j;
> +       if (foo ((T) x, tests[k].x) != tests[k].foo ((T) x)
> +           || foo ((T) -x, tests[k].x) != tests[k].foo ((T) -x))
> +         __builtin_abort ();
> +     }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr97459-5.c.jj       2020-12-01 18:27:03.386590701 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-5.c  2020-12-01 18:28:30.324618095 +0100
> @@ -0,0 +1,56 @@
> +/* PR rtl-optimization/97459 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef __uint128_t T;
> +#else
> +typedef unsigned long long T;
> +#endif
> +
> +T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; }
> +#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 
> 10000); return x / (n - 10000); }
> +
> +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
> +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
> +           C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
> +#ifdef EXPENSIVE
> +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
> +           C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
> +           C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
> +#else
> +#define C3(n) C2(n##0) C2(n##4) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##3) C3(n##7)
> +#endif
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
> +
> +TESTS
> +
> +struct S { T x; T (*foo) (T, T *); };
> +
> +#undef C
> +#define C(n) { n - 10000, foo##n },
> +
> +struct S tests[] = {
> +TESTS
> +  { 0, 0 }
> +};
> +
> +int
> +main ()
> +{
> +  int i, j, k;
> +  for (k = 0; tests[k].x; k++)
> +    for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
> +      for (j = -5; j <= 5; j++)
> +     {
> +       T x = ((T) 1 << i) + j;
> +       T r1, r2;
> +       if (foo (x, tests[k].x, &r1) != tests[k].foo (x, &r2)
> +           || r1 != r2)
> +         __builtin_abort ();
> +     }
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr97459-6.c.jj       2020-12-01 18:28:55.452336978 
> +0100
> +++ gcc/testsuite/gcc.dg/pr97459-6.c  2020-12-01 18:31:50.667376785 +0100
> @@ -0,0 +1,62 @@
> +/* PR rtl-optimization/97459 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef __int128_t T;
> +typedef __uint128_t U;
> +#else
> +typedef long long T;
> +typedef unsigned long long U;
> +#endif
> +
> +T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; }
> +#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 
> 10000); return x / (n - 10000); }
> +
> +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9)
> +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \
> +           C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9)
> +#ifdef EXPENSIVE
> +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \
> +           C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \
> +           C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9)
> +#else
> +#define C3(n) C2(n##0) C2(n##4) C2(n##9)
> +#define C4(n) C3(n##0) C3(n##3) C3(n##7)
> +#endif
> +#define TESTS C4(1) C1(10010) C1(10012) C1(16144)
> +
> +TESTS
> +
> +struct S { T x; T (*foo) (T, T *); };
> +
> +#undef C
> +#define C(n) { n - 10000, foo##n },
> +
> +struct S tests[] = {
> +TESTS
> +  { 0, 0 }
> +};
> +
> +int
> +main ()
> +{
> +  int i, j, k;
> +  for (k = 0; tests[k].x; k++)
> +    for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++)
> +      for (j = -5; j <= 5; j++)
> +     {
> +       U x = ((U) 1 << i) + j;
> +       T r1 = 0, r2 = 0;
> +       if (foo ((T) x, tests[k].x, &r1) != tests[k].foo ((T) x, &r2)
> +           || r1 != r2)
> +         __builtin_abort ();
> +       r1 = 0; r2 = 0;
> +       if (foo ((T) -x, tests[k].x, &r1) != tests[k].foo ((T) -x, &r2)
> +           || r1 != r2)
> +         __builtin_abort ();
> +     }
> +  return 0;
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] expansion: Further improve double-word modulo, division and divmod [PR97459]

Reply via email to