Hi Michael, thanks for the suggestions about code formatting. I formatted the whole fast_list.h file in the Netbeans editor and uploaded a new revision of the patch.
Jan On Tue, Oct 18, 2016 at 1:42 PM, Michael Schellenberger Costa <[email protected]> wrote: > Hi Jan, > > On 18.10.2016 00:07, Jan Ziak wrote: >> >> This patch replaces the ir_variable_refcount_entry's linked-list >> with an array-list. >> >> The array-list has local storage which does not require ANY additional >> allocations if the list has small number of elements. The size of this >> storage is configurable for each variable. >> >> Benchmark results for "./run -1 shaders" from shader-db[1]: >> >> - The total number of executed instructions goes down from 64.184 to >> 63.797 >> giga-instructions when Mesa is compiled with "gcc -O0 ..." >> - In the call tree starting at function do_dead_code(): >> - the number of calls to malloc() is reduced by about 10% >> - the number of calls to free() is reduced by about 30% >> >> [1] git://anongit.freedesktop.org/mesa/shader-db >> >> Signed-off-by: Jan Ziak (http://atom-symbol.net) >> <[email protected]> >> --- >> src/compiler/glsl/ir_variable_refcount.cpp | 14 +-- >> src/compiler/glsl/ir_variable_refcount.h | 8 +- >> src/compiler/glsl/opt_dead_code.cpp | 19 ++-- >> src/util/fast_list.h | 167 >> +++++++++++++++++++++++++++++ >> 4 files changed, 176 insertions(+), 32 deletions(-) >> >> diff --git a/src/compiler/glsl/ir_variable_refcount.cpp >> b/src/compiler/glsl/ir_variable_refcount.cpp >> index 8306be1..94d6edc 100644 >> --- a/src/compiler/glsl/ir_variable_refcount.cpp >> +++ b/src/compiler/glsl/ir_variable_refcount.cpp >> @@ -46,15 +46,6 @@ static void >> free_entry(struct hash_entry *entry) >> { >> ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) >> entry->data; >> - >> - /* Free assignment list */ >> - exec_node *n; >> - while ((n = ivre->assign_list.pop_head()) != NULL) { >> - struct assignment_entry *assignment_entry = >> - exec_node_data(struct assignment_entry, n, link); >> - free(assignment_entry); >> - } >> - >> delete ivre; >> } >> @@ -142,10 +133,7 @@ >> ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) >> */ >> assert(entry->referenced_count >= entry->assigned_count); >> if (entry->referenced_count == entry->assigned_count) { >> - struct assignment_entry *assignment_entry = >> - (struct assignment_entry *)calloc(1, >> sizeof(*assignment_entry)); >> - assignment_entry->assign = ir; >> - entry->assign_list.push_head(&assignment_entry->link); >> + entry->assign_list.add(ir); >> } >> } >> diff --git a/src/compiler/glsl/ir_variable_refcount.h >> b/src/compiler/glsl/ir_variable_refcount.h >> index 08a11c0..c3ec5fe 100644 >> --- a/src/compiler/glsl/ir_variable_refcount.h >> +++ b/src/compiler/glsl/ir_variable_refcount.h >> @@ -32,11 +32,7 @@ >> #include "ir.h" >> #include "ir_visitor.h" >> #include "compiler/glsl_types.h" >> - >> -struct assignment_entry { >> - exec_node link; >> - ir_assignment *assign; >> -}; >> +#include "util/fast_list.h" >> class ir_variable_refcount_entry >> { >> @@ -50,7 +46,7 @@ public: >> * This is intended to be used for dead code optimisation and may >> * not be a complete list. >> */ >> - exec_list assign_list; >> + arraylist<ir_assignment*,4> assign_list; >> /** Number of times the variable is referenced, including >> assignments. */ >> unsigned referenced_count; >> diff --git a/src/compiler/glsl/opt_dead_code.cpp >> b/src/compiler/glsl/opt_dead_code.cpp >> index 75e668a..06e8c3d 100644 >> --- a/src/compiler/glsl/opt_dead_code.cpp >> +++ b/src/compiler/glsl/opt_dead_code.cpp >> @@ -52,7 +52,7 @@ do_dead_code(exec_list *instructions, bool >> uniform_locations_assigned) >> struct hash_entry *e; >> hash_table_foreach(v.ht, e) { >> - ir_variable_refcount_entry *entry = (ir_variable_refcount_entry >> *)e->data; >> + ir_variable_refcount_entry *const entry = >> (ir_variable_refcount_entry *)e->data; >> /* Since each assignment is a reference, the refereneced count >> must be >> * greater than or equal to the assignment count. If they are >> equal, >> @@ -89,7 +89,7 @@ do_dead_code(exec_list *instructions, bool >> uniform_locations_assigned) >> if (entry->var->data.always_active_io) >> continue; >> - if (!entry->assign_list.is_empty()) { >> + if (!entry->assign_list.empty()) { >> /* Remove all the dead assignments to the variable we found. >> * Don't do so if it's a shader or function output, though. >> */ >> @@ -98,26 +98,19 @@ do_dead_code(exec_list *instructions, bool >> uniform_locations_assigned) >> entry->var->data.mode != ir_var_shader_out && >> entry->var->data.mode != ir_var_shader_storage) { >> - while (!entry->assign_list.is_empty()) { >> - struct assignment_entry *assignment_entry = >> - exec_node_data(struct assignment_entry, >> - entry->assign_list.get_head_raw(), >> link); >> - >> - assignment_entry->assign->remove(); >> - >> + for(ir_assignment *assign : entry->assign_list) { > > The original code separates control flow instructions as for or while with a > space before the brace, aka "for (...". This applies for all the code. > >> + assign->remove(); >> if (debug) { >> printf("Removed assignment to %s@%p\n", >> entry->var->name, (void *) entry->var); >> } >> - >> - assignment_entry->link.remove(); >> - free(assignment_entry); >> } >> + entry->assign_list.clear(); >> progress = true; >> } >> } >> - if (entry->assign_list.is_empty()) { >> + if (entry->assign_list.empty()) { >> /* If there are no assignments or references to the variable >> left, >> * then we can remove its declaration. >> */ >> diff --git a/src/util/fast_list.h b/src/util/fast_list.h >> new file mode 100644 >> index 0000000..4abd965 >> --- /dev/null >> +++ b/src/util/fast_list.h >> @@ -0,0 +1,167 @@ >> +/* >> + * Copyright © 2016 Jan Ziak >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining >> a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without >> limitation >> + * the rights to use, copy, modify, merge, publish, distribute, >> sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the >> next >> + * paragraph) shall be included in all copies or substantial portions of >> the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Jan Ziak (http://atom-symbol.net) <[email protected]> >> + */ >> + >> +/** >> + * @file fast_list.h >> + * >> + * High-performance C++ list implementation. >> + */ >> + >> +#ifndef _UTIL_FAST_LIST_H >> +#define _UTIL_FAST_LIST_H >> + >> +#include <new> >> +#include <stdlib.h> >> +#include <string.h> >> + >> +/** >> + * A high-performance array list. >> + * >> + * The list doesn't support elements whose behavior depends on the memory >> address of the element. >> + * >> + * @param T List element type. >> + * >> + * @param N The number of statically allocated elements to achieve the >> highest overall performance. >> + * N is typically 0, 2, 4 or 8 depending on the runtime usage >> pattern of a particular list >> + * variable in the source code. >> + */ >> +template<typename T, size_t N> >> +struct arraylist >> +{ >> + size_t len; // Length >> + size_t cap; // Capacity (size of memory allocated to 'elem') >> + T *elem; // List elements >> + uint8_t local[N*sizeof(T)]; // Fast local preallocated storage for >> lists with length 0..N >> + >> + arraylist() { >> + len = 0; >> + cap = N; >> + elem = (N ? (T*)local : NULL); > > You might want to use nullptr if you go for C++11 anyway (As i assume from > the range based loops above) >> >> + } >> + >> + ~arraylist() { >> + if(!__has_trivial_destructor(T)) { >> + for(size_t i=0; i<len; i++) { > > Please separate operators by whitespace aka "i < len" >> >> + elem[i].~T(); >> + } >> + } >> + if((N && elem != (T*)local) && elem) { >> + free(elem); >> + } >> + } >> + >> + arraylist(const arraylist<T,N> &a) = delete; >> + void operator=(const arraylist<T,N> &a) = delete; >> + >> + bool empty() const { >> + return len==0; > > same >> >> + } >> + >> + void clear() { >> + if(!__has_trivial_destructor(T)) { >> + for(size_t i=0; i<len; i++) { > > same > >> + elem[i].~T(); >> + } >> + } >> + len = 0; >> + } >> + >> + arraylist<T,N>& add(const T &e) { >> + if(len == cap) { >> + enlarge1(); >> + } >> + new (&elem[len++]) T(e); >> + return *this; >> + } >> + >> + T& operator[](size_t i) { return elem[i]; } >> + const T& operator[](size_t i) const { return elem[i]; } >> + >> + /* >> + * C++ iterators: >> + */ >> + >> + struct const_iterator_t { >> + const T *const elem; >> + size_t i; >> + const_iterator_t(const T *_elem, size_t _i) : elem(_elem), i(_i) {} >> + const T& operator*() const { return elem[i]; } >> + void operator++() { i++; } >> + bool operator!=(const const_iterator_t &b) const { return i < b.i; >> } >> + }; >> + struct iterator_t { >> + T *const elem; >> + size_t i; >> + iterator_t(T *_elem, size_t _i) : elem(_elem), i(_i) {} >> + T& operator*() const { return elem[i]; } >> + void operator++() { i++; } >> + bool operator!=(const iterator_t &b) const { return i < b.i; } >> + }; >> + >> + const_iterator_t const_begin() const { return const_iterator_t(elem, >> 0); } >> + const_iterator_t const_end() const { return const_iterator_t(elem, >> len); } > > The stl knows those as cbegin() and cend(), maybe stick with the standard > names? > --Michael >> >> + >> + const_iterator_t begin() const { return const_iterator_t(elem, 0); } >> + const_iterator_t end() const { return const_iterator_t(elem, len); } >> + >> + iterator_t begin() { return iterator_t(elem, 0); } >> + iterator_t end() { return iterator_t(elem, len); } >> + >> + /* >> + * Methods for std::vector<T> compatibility: >> + */ >> + >> + arraylist<T,N>& push_back(const T &e) { return add(e); } >> + >> +private: >> + void enlarge1() { >> + if(N) { >> + cap *= 2; >> + if(elem == (T*)local) { >> + elem = (T*)malloc(cap*sizeof(T)); >> + if(!elem) { >> + abort(); >> + } >> + memcpy(elem, local, len*sizeof(T)); >> + } >> + else { >> + elem = (T*)realloc(elem, cap*sizeof(T)); >> + if(!elem) { >> + abort(); >> + } >> + } >> + } >> + else { >> + cap = (cap ? 2*cap : 4); >> + elem = (T*)realloc(elem, cap*sizeof(T)); >> + if(!elem) { >> + abort(); >> + } >> + } >> + } >> +}; >> + >> +#endif /* _UTIL_FAST_LIST_H */ >> \ No newline at end of file >> _______________________________________________ >> mesa-dev mailing list >> [email protected] >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
