------- Comment #3 from vda dot linux at googlemail dot com 2006-07-19 16:24 ------- I instrumented choose_multiplier(). When called like this: choose_multiplier(d=1577682821,n=32,precision=32), it returns *post_shift_ptr=31,multiplier=5846151023
whereas optimal one is *post_shift_ptr=27,multiplier=365384439 In the code: "////" comments show variable values if d=1577682821. The instrumented version is below. Too bad there are no comments how the code works, what is mlow and mhigh. Note that the good value, if shifted left 4 times, 365384439<<4, is equal to 5846151024, which is mhigh+1! With mhigh=5846151024 "Reduce to lowest terms" part of code would reduce mhigh/mlow pair exactly to correct value of 365384439, I think... Isn't this a reason why choose_multiplier() doesn't consider 365384439 to be a valid multiplier? Maybe mhigh calcucation is off-by-one? unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) { HOST_WIDE_INT mhigh_hi, mlow_hi; unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; int lgup, post_shift; int pow, pow2; unsigned HOST_WIDE_INT nl, dummy1; HOST_WIDE_INT nh, dummy2; static int divdebug = -1; if (divdebug == -1) divdebug = NULL!=getenv("DIV_DEBUG"); divdebug && printf("choose_multiplier(d=%llu,n=%i,precision=%i)\n",(unsigned long long)d,n,precision); //// d=1577682821,n=32,precision=32 /* lgup = ceil(log2(divisor)); */ lgup = ceil_log2 (d); //// 31 gcc_assert (lgup <= n); pow = n + lgup; //// 63 pow2 = n + lgup - precision; //// 31 /* We could handle this with some effort, but this case is much better handled directly with a scc insn, so rely on caller using that. */ gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT); /* mlow = 2^(N + lgup)/d */ if (pow >= HOST_BITS_PER_WIDE_INT) //// yes { nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT); //// 1<<31 nl = 0; } else { nh = 0; nl = (unsigned HOST_WIDE_INT) 1 << pow; } //// 1<<63 / d: mlow=5846151022 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, &mlow_lo, &mlow_hi, &dummy1, &dummy2); divdebug && printf("choose_multiplier: mlow=%llu\n",((unsigned long long)mlow_hi<<32)+mlow_lo); /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */ if (pow2 >= HOST_BITS_PER_WIDE_INT) //// no nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT); else nl |= (unsigned HOST_WIDE_INT) 1 << pow2; //// 1<<31 //// (1<<63+1<<31) / d: mhigh=5846151023 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, &mhigh_lo, &mhigh_hi, &dummy1, &dummy2); divdebug && printf("choose_multiplier: mhigh=%llu\n",((unsigned long long)mhigh_hi<<32)+mhigh_lo); gcc_assert (!mhigh_hi || nh - d < d); gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1); /* Assert that mlow < mhigh. */ gcc_assert (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)); /* If precision == N, then mlow, mhigh exceed 2^N (but they do not exceed 2^(N+1)). */ /* Reduce to lowest terms. */ for (post_shift = lgup; post_shift > 0; post_shift--) { unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1); if (ml_lo >= mh_lo) //// if mlow/2 == mhigh/2 - bail out break; mlow_hi = 0; mlow_lo = ml_lo; mhigh_hi = 0; mhigh_lo = mh_lo; } *post_shift_ptr = post_shift; *lgup_ptr = lgup; if (n < HOST_BITS_PER_WIDE_INT) { unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; *multiplier_ptr = GEN_INT (mhigh_lo & mask); divdebug && printf("choose_multiplier: *post_shift_ptr=%i,*lgup_ptr=%i,multiplier=%llu\n",*post_shift_ptr,*lgup_ptr,(unsigned long long)(mhigh_lo & mask) + ((unsigned long long)(mhigh_lo >= mask) << 32)); return mhigh_lo >= mask; } else { *multiplier_ptr = GEN_INT (mhigh_lo); divdebug && printf("choose_multiplier: *post_shift_ptr=%i,*lgup_ptr=%i,multiplier=%llu\n",*post_shift_ptr,*lgup_ptr,(unsigned long long)(mhigh_lo) + ((unsigned long long)(mhigh_hi) << 32)); return mhigh_hi; } } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28417