On 07/21/2014 01:56 PM, Dylan Baker wrote: > On Monday, July 14, 2014 03:48:58 PM Ian Romanick wrote: >> From: Ian Romanick <[email protected]> >> >> The use of the Bloom filter rejects the vast majority of strings that >> are not in the table before expensive comparisons are performed. This >> Bloom filter uses two hashes. One is explicit (calculate_hash in the >> code), and the other is implicit. The implicit hash is just the length >> of the name, and the filter is the switch-statement in >> get_static_name_do_not_call that rejects name with lengths that are not >> in the table. >> >> No change Valgrind massif results for a trimmed apitrace of dota2. >> >> NOTE: This patch could probably get squashed into the previous patch. I >> kept it separate because I thought it would make things easier to >> review. >> >> Signed-off-by: Ian Romanick <[email protected]> >> --- >> src/glsl/common_variable_names.py | 113 >> +++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 >> deletion(-) >> >> diff --git a/src/glsl/common_variable_names.py >> b/src/glsl/common_variable_names.py index 7435a12..ee5571c 100644 >> --- a/src/glsl/common_variable_names.py >> +++ b/src/glsl/common_variable_names.py >> @@ -119,6 +119,46 @@ name_really_is_not_in_static_names(const char *name) >> } >> #endif /* DEBUG */ >> >> +static uint32_t >> +calculate_hash(const char *str) >> +{ >> + uint32_t hash = 5381; >> + >> + while (*str != '\\0') { >> + hash = (hash * 33) + *str; >> + str++; >> + } >> + >> + return hash; >> +} >> + >> +/** >> + * Check the Bloom filter for the name >> + */ >> +static bool >> +name_is_in_Bloom_filter(const char *name) >> +{ >> + static const uint32_t filter[${len(filter)}] = { >> + % for i in range(0,len(filter), 4): >> + ${"{:#010x}".format(filter[i+0])}, ${"{:#010x}".format(filter[i+1])}, >> ${"{:#010x}".format(filter[i+2])}, ${"{:#010x}".format(filter[i+3])}, + >> % endfor >> + }; >> + >> + const uint32_t h = calculate_hash(name); >> + >> + if (h < ${min(all_hash)}) >> + return false; >> + >> + if (h > ${max(all_hash)}) >> + return false; >> + >> + const unsigned bit = (h - ${min(all_hash)}) % ${len(filter) * 32}; >> + const unsigned i = bit / 32; >> + const unsigned j = bit % 32; >> + >> + return (filter[i] & (1U << j)) != 0; >> +} >> + >> /** >> * Search the static name table for the specified name >> * >> @@ -129,6 +169,11 @@ name_really_is_not_in_static_names(const char *name) >> static const char * >> get_static_name_do_not_call(const char *name) >> { >> + /* Do the trivial rejection test before anything. >> + */ >> + if (!name_is_in_Bloom_filter(name)) >> + return NULL; >> + >> const unsigned len = strlen(name); >> const ${index_type} *table = NULL; >> unsigned table_size = 0; >> @@ -188,6 +233,14 @@ get_static_name(const char *name) >> } >> """ >> >> +def calculate_hash(str): >> + hash = 5381 >> + >> + for c in str: >> + hash = ((hash * 33) + ord(c)) & 0x0ffffffff >> + >> + return hash >> + >> common_names.sort() >> >> # Partiation the list of names into sublists of names with the same length >> @@ -213,8 +266,66 @@ elif base < (1 << 16): >> else: >> index_type = "uint32_t" >> >> +all_hash = [] >> +for name in common_names: >> + all_hash.append(calculate_hash(name)) > > simplify this to: > all_hash = [calculate_hash(n) for n in common_names]
I learned about list comprehensions right after doing all this code. :)
>> +
>> +# Experimentally determined that 8,192 bits is sufficient for this dataset.
>> +# This was determined by:
>> +#
>> +# 1. Modify the generated code to log a message on entry to
>> get_static_name. +#
>> +# 2. Modify the generated code to log a message when
>> name_is_in_Bloom_filter +# returns true.
>> +#
>> +# 3. Modifiy the generated code to log a message each time a name is
>> rejected +# due to its length. This is the 'default' case in the
>> switch-statement. +# This is an implicit hash that is part of the Bloom
>> filter.
>> +#
>> +# 4. Modify the generated code to log a message each time a name has
>> actually +# been tested (using strcmp) and was not in the table. This
>> means logging at +# the end of get_static_name_do_not_call and inside the
>> switch-statement where +# "lonely" names are handled.
>> +#
>> +# 5. Run a ton of shaders through (e.g., shader-db) and collect the output.
>> +# Count the number of times each message occurred.
>> +#
>> +# Two hash functions were tried. The current one and Murmur2. Exhaustive
>> +# testing over shader-db produced the following results. There were a
>> total +# of 6,749,837 calls to get_static_name in each run. The # in the
>> column +# header refers messages mentioned in the list above.
>> +#
>> +# Bits Current hash Murmur2
>> +# #2 / #3 / #4 #2 / #3 / #4
>> +# 128 5249223 / 262597 / 4826172 763090 / 285531 / 317105
>> +# 256 4955982 / 162087 / 4633441 508512 / 152491 / 195567
>> +# 512 334736 / 111152 / 63130 381691 / 98277 / 122960
>> +# 1024 236521 / 43809 / 32258 326657 / 69346 / 96857
>> +# 2048 174275 / 1533 / 12288 258885 / 49537 / 48894
>> +# 4096 171153 / 341 / 10358 185632 / 682 / 24496
>> +# 8192 161649 / 264 / 931 163035 / 142 / 2439
>> +# 16384 160782 / 0 / 328 162764 / 15 / 2295
>> +#
>> +# Past 512 bits, the current hash was clearly superior to Murmur2. 8,192
>> bits +# seems to be a sweet spot of a very small table size (1KiB) and a
>> less than +# 1% false-positive rate (931/161649).
>> +
>> +filter_bits = 8192
>> +m = min(all_hash)
>> +filter = []
>> +for i in range(0, filter_bits / 32):
>> + filter.append(0)
>
> there's a couple of things about this:
> 1) range() returns a list, if you're iterating use xrange() instead
Ah. Good tip.
> 2) be carful of division in python if you're using ints and floats, python2 /
> is floor division for two ints, but is true division if any input is a float,
> you can use 'from __future__ import division' to get seperate operators for
> floor and true division. (// is floor and / is true in that case)
filter_bits and 32 should be ints, so... will it just do the right
thing? Is (filter_bits >> 5) more safe?
> 3) this is a silly function, all you need is:
> filter = range(0, filter_bits / 32)
But that will be different... that's the same as
filter = [0, 1, 2, 3, ...]
but I want
filter = [0, 0, 0, 0, ...]
So...
filter = [0 for i in xrange(0, filter_bits / 32)]
should do the trick?
>> +
>> +for h in all_hash:
>> + idx = ((h - m) % filter_bits) / 32
>> + bit = ((h - m) % filter_bits) % 32
>> +
>> + filter[idx] = filter[idx] | (1 << bit)
>> +
>> from mako.template import Template
>> print Template(template).render(location_dict = location_dict,
>> sized_lists = sized_lists,
>> common_names = common_names,
>> - index_type = index_type)
>> + index_type = index_type,
>> + filter = filter,
>> + all_hash = all_hash)
>
> Like in the last patch, lose the spaces around the '='
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
