Looks good to me. A small comment above the disabled version noting that it's disabled because it's a bit slower might be useful for the next person who reads the code.
Reviewed-by: Zack Rusin <[email protected]> ----- Original Message ----- > From: Roland Scheidegger <[email protected]> > > Should be much faster, seems to work in softpipe. > While here (also it's now disabled) fix up the pow factor - the former value > is what is in GL core it is however not actually accurate to fp32 standard > (as it is 1.0/2.4), and if someone would do all the accurate math there's no > reason to waste 8 mantissa bits or so... > > v2: use real table generating function instead of just printing the values > (might take a bit longer as it does calculations on some 3+ million floats > but much more descriptive obviously). > Also fix up another pow factor (this time in the python code) - wondering > where the couple one bit errors came from :-(. > --- > src/gallium/auxiliary/util/u_format_srgb.h | 55 > +++++++++++++++++++++----- > src/gallium/auxiliary/util/u_format_srgb.py | 57 > ++++++++++++++++++++++++++- > 2 files changed, 101 insertions(+), 11 deletions(-) > > diff --git a/src/gallium/auxiliary/util/u_format_srgb.h > b/src/gallium/auxiliary/util/u_format_srgb.h > index 82ed957..f3e1b20 100644 > --- a/src/gallium/auxiliary/util/u_format_srgb.h > +++ b/src/gallium/auxiliary/util/u_format_srgb.h > @@ -39,6 +39,7 @@ > > > #include "pipe/p_compiler.h" > +#include "u_pack_color.h" > #include "u_math.h" > > > @@ -51,23 +52,57 @@ util_format_srgb_to_linear_8unorm_table[256]; > extern const uint8_t > util_format_linear_to_srgb_8unorm_table[256]; > > +extern const unsigned > +util_format_linear_to_srgb_helper_table[104]; > + > > /** > * Convert a unclamped linear float to srgb value in the [0,255]. > - * XXX this hasn't been tested (render to srgb surface). > - * XXX this needs optimization. > */ > static INLINE uint8_t > util_format_linear_float_to_srgb_8unorm(float x) > { > - if (x >= 1.0f) > - return 255; > - else if (x >= 0.0031308f) > - return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f); > - else if (x > 0.0f) > - return float_to_ubyte(12.92f * x); > - else > - return 0; > + if (0) { > + if (x >= 1.0f) > + return 255; > + else if (x >= 0.0031308f) > + return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f); > + else if (x > 0.0f) > + return float_to_ubyte(12.92f * x); > + else > + return 0; > + } > + else { > + /* > + * This is taken from https://gist.github.com/rygorous/2203834 > + * Use LUT and do linear interpolation. > + */ > + union fi almostone, minval, f; > + unsigned tab, bias, scale, t; > + > + almostone.ui = 0x3f7fffff; > + minval.ui = (127-13) << 23; > + > + /* > + * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, > respectively. > + * The tests are carefully written so that NaNs map to 0, same as in > the > + * reference implementation. > + */ > + if (!(x > minval.f)) > + x = minval.f; > + if (x > almostone.f) > + x = almostone.f; > + > + /* Do the table lookup and unpack bias, scale */ > + f.f = x; > + tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> > 20]; > + bias = (tab >> 16) << 9; > + scale = tab & 0xffff; > + > + /* Grab next-highest mantissa bits and perform linear interpolation */ > + t = (f.ui >> 12) & 0xff; > + return (uint8_t) ((bias + scale*t) >> 16); > + } > } > > > diff --git a/src/gallium/auxiliary/util/u_format_srgb.py > b/src/gallium/auxiliary/util/u_format_srgb.py > index cd63ae7..c6c02f0 100644 > --- a/src/gallium/auxiliary/util/u_format_srgb.py > +++ b/src/gallium/auxiliary/util/u_format_srgb.py > @@ -40,6 +40,7 @@ CopyRight = ''' > > > import math > +import struct > > > def srgb_to_linear(x): > @@ -51,10 +52,11 @@ def srgb_to_linear(x): > > def linear_to_srgb(x): > if x >= 0.0031308: > - return 1.055 * math.pow(x, 0.41666) - 0.055 > + return 1.055 * math.pow(x, 0.41666666) - 0.055 > else: > return 12.92 * x > > + > def generate_srgb_tables(): > print 'const float' > print 'util_format_srgb_8unorm_to_linear_float_table[256] = {' > @@ -84,6 +86,59 @@ def generate_srgb_tables(): > print '};' > print > > +# calculate the table interpolation values used in float linear to unorm8 > srgb > + numexp = 13 > + mantissa_msb = 3 > +# stepshift is just used to only use every x-th float to make things faster, > +# 5 is largest value which still gives exact same table as 0 > + stepshift = 5 > + nbuckets = numexp << mantissa_msb > + bucketsize = (1 << (23 - mantissa_msb)) >> stepshift > + mantshift = 12 > + valtable = [] > + sum_aa = float(bucketsize) > + sum_ab = 0.0 > + sum_bb = 0.0 > + for i in range(0, bucketsize): > + j = (i << stepshift) >> mantshift > + sum_ab += j > + sum_bb += j*j > + inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab) > + > + for bucket in range(0, nbuckets): > + start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift) > + sum_a = 0.0 > + sum_b = 0.0 > + > + for i in range(0, bucketsize): > + j = (i << stepshift) >> mantshift > + fint = start + (i << stepshift) > + ffloat = struct.unpack('f', struct.pack('I', fint))[0] > + val = linear_to_srgb(ffloat) * 255.0 + 0.5 > + sum_a += val > + sum_b += j*val > + > + solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b) > + solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a) > + > + scaled_a = solved_a * 65536.0 / 512.0 > + scaled_b = solved_b * 65536.0 > + > + int_a = int(scaled_a + 0.5) > + int_b = int(scaled_b + 0.5) > + > + valtable.append((int_a << 16) + int_b) > + > + print 'const unsigned' > + print 'util_format_linear_to_srgb_helper_table[104] = {' > + > + for j in range(0, nbuckets, 4): > + print ' ', > + for i in range(j, j + 4): > + print '0x%08x,' % (valtable[i],), > + print > + print '};' > + print > > def main(): > print '/* This file is autogenerated by u_format_srgb.py. Do not edit > directly. */' > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
