On Thu, 1 Aug 2019, Richard Biener wrote:
> So - were you able to allocate some cycles for this? I think
> we've settled with doing the implementation qsort_r-style while
> implementing the wrapping with fancy C++ templates.
Yes. It works, but not quite as perfectly as I'd hoped. I found two issues:
1. The fancy wrapper needs C++11; in C++98, it can't be applied to static
comparators, so in that case we need option-1-like wrapping.
I noticed thanks to the fact we build stage1 with -std=gnu++98.
2. Sometimes we fail to inline the comparator into the wrapper; I imagine it
happens when inlining hits an internal limit (even though on its own such
inlining is clearly beneficial). In particular a couple comparators in
IRA/LRA suffer from this.
Two questions for you:
1. You indicated a dislike for using templates to "duplicate" functionality in
sort.cc to implement both classic qsort and qsort_r. What are the main
concerns, specifically? I find it a reasonable approach.
2. In your patch, you're using a union to copy between a void * and a function
pointer, saying C++ disallows a cast. A simple C-style cast worked well
enough for me (as used in the patch). Am I missing something?
The patch is below, I'll add comments and resubmit properly if you want to see
it on trunk. Thanks!
diff --git a/gcc/system.h b/gcc/system.h
index d04f8fd3360..2cd5475b32c 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1200,13 +1200,30 @@ helper_const_non_const_cast (const char *p)
/* GCC qsort API-compatible functions: except in release-checking compilers,
redirect 4-argument qsort calls to gcc_qsort; keep 1-argument invocations
corresponding to vec::qsort (cmp): they use C qsort internally anyway. */
-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *));
-void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *));
-void gcc_stablesort (void *, size_t, size_t,
- int (*)(const void *, const void *));
+typedef int sort_cmp_fn(const void *, const void *, void *);
+void gcc_sort (void *, size_t, size_t, sort_cmp_fn *, void *);
+void gcc_sort_chk (void *, size_t, size_t, sort_cmp_fn *, void *);
+void gcc_stablesort (void *, size_t, size_t, sort_cmp_fn *, void *);
+#ifdef __cplusplus
+#if __cplusplus >= 201103L
+template<int cmp (const void *, const void *)>
+int cmp2to3 (const void *a, const void *b, void *)
+{
+ return cmp (a, b);
+}
+#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3<cmp>, NULL)
+#define qsort_1(cmp) sort (cmp2to3<cmp>, NULL)
+#else
+int cmp2to3 (const void *, const void *, void *);
+#define qsort_4(base, n, sz, cmp) gcc_sort (base, n, sz, cmp2to3, (void *)cmp)
+#define qsort_1(cmp) sort (cmp2to3, (void *)cmp)
+#endif
#define PP_5th(a1, a2, a3, a4, a5, ...) a5
#undef qsort
-#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0)
(__VA_ARGS__)
+#define qsort(...) PP_5th (__VA_ARGS__, qsort_4, 3, 2, qsort_1, 0)
(__VA_ARGS__)
+#else
+#pragma GCC poison qsort
+#endif
#define ONE_K 1024
#define ONE_M (ONE_K * ONE_K)
diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 0ac39140c6c..e1be383880a 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -2360,7 +2360,7 @@ reorder_basic_blocks_software_trace_cache (void)
/* Order edges by execution frequency, higher first. */
static int
-edge_order (const void *ve1, const void *ve2)
+edge_order (const void *ve1, const void *ve2, void *)
{
edge e1 = *(const edge *) ve1;
edge e2 = *(const edge *) ve2;
@@ -2423,7 +2423,7 @@ reorder_basic_blocks_simple (void)
all edges are equally desirable. */
if (optimize_function_for_speed_p (cfun))
- gcc_stablesort (edges, n, sizeof *edges, edge_order);
+ gcc_stablesort (edges, n, sizeof *edges, edge_order, NULL);
/* Now decide which of those edges to make fallthrough edges. We set
BB_VISITED if a block already has a fallthrough successor assigned
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 4ad1f658708..e411eadc039 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -971,11 +971,11 @@ get_loop_body_in_dom_order (const class loop *loop)
basic_block *
get_loop_body_in_custom_order (const class loop *loop,
- int (*bb_comparator) (const void *, const void
*))
+ sort_cmp_fn *bb_comparator)
{
basic_block *bbs = get_loop_body (loop);
- qsort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator);
+ gcc_sort (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, NULL);
return bbs;
}
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 0b0154ffd7b..7b7fa5bc6f7 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -375,7 +375,7 @@ extern unsigned get_loop_body_with_size (const class loop
*, basic_block *,
extern basic_block *get_loop_body_in_dom_order (const class loop *);
extern basic_block *get_loop_body_in_bfs_order (const class loop *);
extern basic_block *get_loop_body_in_custom_order (const class loop *,
- int (*) (const void *, const void *));
+ sort_cmp_fn *);
extern vec<edge> get_loop_exit_edges (const class loop *);
extern edge single_exit (const class loop *);
diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c
index 0968d9769fa..2bf0fdd5e27 100644
--- a/gcc/ggc-common.c
+++ b/gcc/ggc-common.c
@@ -925,7 +925,7 @@ public:
/* Compare wrapper used by qsort method. */
static int
- compare (const void *first, const void *second)
+ compare (const void *first, const void *second, void *)
{
const mem_pair_t f = *(const mem_pair_t *)first;
const mem_pair_t s = *(const mem_pair_t *)second;
@@ -935,7 +935,7 @@ public:
/* Compare rows in final GGC summary dump. */
static int
- compare_final (const void *first, const void *second)
+ compare_final (const void *first, const void *second, void *)
{
typedef std::pair<mem_location *, ggc_usage *> mem_pair_t;
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index 99236994d64..ad2d7c433ce 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -2251,7 +2251,7 @@ add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t
*bucket_ptr)
one. As the result of such assignment order, the second allocno
has a better chance to get the best hard register. */
static int
-bucket_allocno_compare_func (const void *v1p, const void *v2p)
+bucket_allocno_compare_func (const void *v1p, const void *v2p, void * = NULL)
{
ira_allocno_t a1 = *(const ira_allocno_t *) v1p;
ira_allocno_t a2 = *(const ira_allocno_t *) v2p;
@@ -2296,8 +2296,7 @@ bucket_allocno_compare_func (const void *v1p, const void
*v2p)
/* Sort bucket *BUCKET_PTR and return the result through
BUCKET_PTR. */
static void
-sort_bucket (ira_allocno_t *bucket_ptr,
- int (*compare_func) (const void *, const void *))
+sort_bucket (ira_allocno_t *bucket_ptr, sort_cmp_fn *compare_func)
{
ira_allocno_t a, head;
int n;
@@ -2308,7 +2307,7 @@ sort_bucket (ira_allocno_t *bucket_ptr,
sorted_allocnos[n++] = a;
if (n <= 1)
return;
- qsort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func);
+ gcc_sort (sorted_allocnos, n, sizeof (ira_allocno_t), compare_func, NULL);
head = NULL;
for (n--; n >= 0; n--)
{
@@ -2580,7 +2579,7 @@ allocno_spill_priority_compare (ira_allocno_t a1,
ira_allocno_t a2)
/* Used for sorting allocnos for spilling. */
static int
-allocno_spill_sort_compare (const void *v1p, const void *v2p)
+allocno_spill_sort_compare (const void *v1p, const void *v2p, void *)
{
ira_allocno_t p1 = *(const ira_allocno_t *) v1p;
ira_allocno_t p2 = *(const ira_allocno_t *) v2p;
diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h
index 9ceb9ccc55b..0a62603fd33 100644
--- a/gcc/mem-stats.h
+++ b/gcc/mem-stats.h
@@ -188,7 +188,7 @@ public:
/* Compare wrapper used by qsort method. */
static int
- compare (const void *first, const void *second)
+ compare (const void *first, const void *second, void *)
{
typedef std::pair<mem_location *, mem_usage *> mem_pair_t;
@@ -362,13 +362,12 @@ public:
the number of elements in the list. If we want to process custom order,
CMP comparator can be provided. */
mem_list_t *get_list (mem_alloc_origin origin, unsigned *length,
- int (*cmp) (const void *first,
- const void *second) = NULL);
+ sort_cmp_fn *cmp = NULL);
/* Dump all tracked instances of type ORIGIN. If we want to process custom
order, CMP comparator can be provided. */
void dump (mem_alloc_origin origin,
- int (*cmp) (const void *first, const void *second) = NULL);
+ sort_cmp_fn *cmp = NULL);
/* Reverse object map used for every object allocation mapping. */
reverse_object_map_t *m_reverse_object_map;
@@ -594,8 +593,7 @@ template <class T>
inline
typename mem_alloc_description<T>::mem_list_t *
mem_alloc_description<T>::get_list (mem_alloc_origin origin, unsigned *length,
- int (*cmp) (const void *first,
- const void *second))
+ sort_cmp_fn *cmp)
{
/* vec data structure is not used because all vectors generate memory
allocation info a it would create a cycle. */
@@ -608,7 +606,7 @@ mem_alloc_description<T>::get_list (mem_alloc_origin
origin, unsigned *length,
if ((*it).first->m_origin == origin)
list[i++] = std::pair<mem_location*, T*> (*it);
- qsort (list, i, element_size, cmp == NULL ? T::compare : cmp);
+ gcc_sort (list, i, element_size, cmp == NULL ? T::compare : cmp, NULL);
*length = i;
return list;
@@ -638,8 +636,7 @@ mem_alloc_description<T>::get_sum (mem_alloc_origin origin)
template <class T>
inline void
mem_alloc_description<T>::dump (mem_alloc_origin origin,
- int (*cmp) (const void *first,
- const void *second))
+ sort_cmp_fn *cmp)
{
unsigned length;
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index bb8016bb530..4d6e5f3a2c0 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -5982,7 +5982,7 @@ sel_create_new_region (void)
/* If X has a smaller topological sort number than Y, returns -1;
if greater, returns 1. */
static int
-bb_top_order_comparator (const void *x, const void *y)
+bb_top_order_comparator (const void *x, const void *y, void *)
{
basic_block bb1 = *(const basic_block *) x;
basic_block bb2 = *(const basic_block *) y;
diff --git a/gcc/sort.cc b/gcc/sort.cc
index 3e9c032c462..efd0fe75478 100644
--- a/gcc/sort.cc
+++ b/gcc/sort.cc
@@ -45,13 +45,11 @@ along with GCC; see the file COPYING3. If not see
#define noinline
#endif
-/* C-style qsort comparator function type. */
-typedef int cmp_fn (const void *, const void *);
-
/* Structure holding read-mostly (read-only in netsort) context. */
struct sort_ctx
{
- cmp_fn *cmp; // pointer to comparator
+ void *data; // comparator user data
+ sort_cmp_fn *cmp; // pointer to comparator
char *out; // output buffer
size_t n; // number of elements
size_t size; // element size
@@ -128,10 +126,10 @@ do { \
This is noinline to avoid code growth and confine invocation
to a single call site, assisting indirect branch prediction. */
noinline static intptr_t
-cmp1 (char *e0, char *e1, cmp_fn *cmp)
+cmp1 (char *e0, char *e1, void *data, sort_cmp_fn *cmp)
{
intptr_t x = (intptr_t)e0 ^ (intptr_t)e1;
- return x & (cmp (e0, e1) >> 31);
+ return x & (cmp (e0, e1, data) >> 31);
}
/* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT.
@@ -141,7 +139,7 @@ netsort (char *in, sort_ctx *c)
{
#define CMP(e0, e1) \
do { \
- intptr_t x = cmp1 (e1, e0, c->cmp); \
+ intptr_t x = cmp1 (e1, e0, c->data, c->cmp); \
e0 = (char *)((intptr_t)e0 ^ x); \
e1 = (char *)((intptr_t)e1 ^ x); \
} while (0)
@@ -194,7 +192,7 @@ mergesort (char *in, sort_ctx *c, size_t n, char *out, char
*tmp)
/* Merge sorted halves given by L, R to [OUT, END). */
#define MERGE_ELTSIZE(SIZE) \
do { \
- intptr_t mr = c->cmp (r, l) >> 31; \
+ intptr_t mr = c->cmp (r, l, c->data) >> 31; \
intptr_t lr = (intptr_t)l ^ (intptr_t)r; \
lr = (intptr_t)l ^ (lr & mr); \
out = (char *)memcpy (out, (char *)lr, SIZE); \
@@ -204,7 +202,7 @@ do { \
l += ~mr & SIZE; \
} while (r != end)
- if (likely (c->cmp(r, l + (r - out) - c->size) < 0))
+ if (likely (c->cmp(r, l + (r - out) - c->size, c->data) < 0))
{
char *end = out + n * c->size;
if (sizeof (size_t) == 8 && likely (c->size == 8))
@@ -218,7 +216,7 @@ do { \
}
void
-gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_sort (void *vbase, size_t n, size_t size, sort_cmp_fn *cmp, void *data)
{
if (n < 2)
return;
@@ -227,7 +225,7 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
if (stable)
nlim = 3, size = ~size;
char *base = (char *)vbase;
- sort_ctx c = {cmp, base, n, size, nlim};
+ sort_ctx c = {data, cmp, base, n, size, nlim};
long long scratch[32];
size_t bufsz = (n / 2) * size;
void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz);
@@ -235,12 +233,19 @@ gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn
*cmp)
if (buf != scratch)
free (buf);
#if CHECKING_P
- qsort_chk (vbase, n, size, cmp);
+ gcc_sort_chk (vbase, n, size, cmp, data);
#endif
}
void
-gcc_stablesort (void *vbase, size_t n, size_t size, cmp_fn *cmp)
+gcc_stablesort (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void
*data)
{
- gcc_qsort (vbase, n, ~size, cmp);
+ gcc_sort (base, n, ~size, cmp, data);
}
+
+#if __cplusplus < 201103L
+int cmp2to3(const void *a, const void *b, void *c)
+{
+ return ((int (*)(const void *, const void *))c)(a, b);
+}
+#endif
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 81784866ad1..fcc9cd6fe04 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -486,7 +486,7 @@ static int bb_top_order_index_size;
if greater, returns 1. */
static int
-bb_top_order_cmp (const void *x, const void *y)
+bb_top_order_cmp (const void *x, const void *y, void *)
{
basic_block bb1 = *(const basic_block *) x;
basic_block bb2 = *(const basic_block *) y;
@@ -2606,7 +2606,7 @@ version_for_distribution_p (vec<struct partition *>
*partitions,
/* Compare base offset of builtin mem* partitions P1 and P2. */
static int
-offset_cmp (const void *vp1, const void *vp2)
+offset_cmp (const void *vp1, const void *vp2, void *)
{
struct partition *p1 = *(struct partition *const *) vp1;
struct partition *p2 = *(struct partition *const *) vp2;
@@ -2665,7 +2665,7 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
/* Stable sort is required in order to avoid breaking dependence. */
gcc_stablesort (&(*partitions)[i], j - i, sizeof (*partitions)[i],
- offset_cmp);
+ offset_cmp, NULL);
/* Continue with next partition. */
i = j;
}
diff --git a/gcc/vec.c b/gcc/vec.c
index cbd2db010f9..43ae799fcbd 100644
--- a/gcc/vec.c
+++ b/gcc/vec.c
@@ -192,21 +192,23 @@ dump_vec_loc_statistics (void)
ATTRIBUTE_NORETURN ATTRIBUTE_COLD
static void
qsort_chk_error (const void *p1, const void *p2, const void *p3,
- int (*cmp) (const void *, const void *))
+ sort_cmp_fn *cmp, void *data)
{
if (!p3)
{
- int r1 = cmp (p1, p2), r2 = cmp (p2, p1);
- error ("qsort comparator not anti-commutative: %d, %d", r1, r2);
+ int r1 = cmp (p1, p2, data), r2 = cmp (p2, p1, data);
+ error ("qsort comparator not anti-symmetric: %d, %d", r1, r2);
}
else if (p1 == p2)
{
- int r = cmp (p1, p3);
+ int r = cmp (p1, p3, data);
error ("qsort comparator non-negative on sorted output: %d", r);
}
else
{
- int r1 = cmp (p1, p2), r2 = cmp (p2, p3), r3 = cmp (p1, p3);
+ int r1 = cmp (p1, p2, data);
+ int r2 = cmp (p2, p3, data);
+ int r3 = cmp (p1, p3, data);
error ("qsort comparator not transitive: %d, %d, %d", r1, r2, r3);
}
internal_error ("qsort checking failed");
@@ -215,8 +217,7 @@ qsort_chk_error (const void *p1, const void *p2, const void
*p3,
/* Verify anti-symmetry and transitivity for comparator CMP on sorted array
of N SIZE-sized elements pointed to by BASE. */
void
-qsort_chk (void *base, size_t n, size_t size,
- int (*cmp)(const void *, const void *))
+gcc_sort_chk (void *base, size_t n, size_t size, sort_cmp_fn *cmp, void *data)
{
#if 0
#define LIM(n) (n)
@@ -225,9 +226,9 @@ qsort_chk (void *base, size_t n, size_t size,
#define LIM(n) ((n) <= 16 ? (n) : 12 + floor_log2 (n))
#endif
#define ELT(i) ((const char *) base + (i) * size)
-#define CMP(i, j) cmp (ELT (i), ELT (j))
-#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp)
-#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp)
+#define CMP(i, j) cmp (ELT (i), ELT (j), data)
+#define ERR2(i, j) qsort_chk_error (ELT (i), ELT (j), NULL, cmp, data)
+#define ERR3(i, j, k) qsort_chk_error (ELT (i), ELT (j), ELT (k), cmp, data)
size_t i1, i2, i, j;
/* This outer loop iterates over maximum spans [i1, i2) such that
elements within each span compare equal to each other. */
diff --git a/gcc/vec.h b/gcc/vec.h
index 2dbf3074da0..86885486433 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -592,7 +592,7 @@ public:
void ordered_remove (unsigned);
void unordered_remove (unsigned);
void block_remove (unsigned, unsigned);
- void qsort (int (*) (const void *, const void *));
+ void sort (sort_cmp_fn *, void *);
T *bsearch (const void *key, int (*compar)(const void *, const void *));
unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
bool contains (const T &search) const;
@@ -1108,10 +1108,10 @@ vec<T, A, vl_embed>::block_remove (unsigned ix,
unsigned len)
template<typename T, typename A>
inline void
-vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *))
+vec<T, A, vl_embed>::sort (sort_cmp_fn *cmp, void *data)
{
if (length () > 1)
- ::qsort (address (), length (), sizeof (T), cmp);
+ gcc_sort (address (), length (), sizeof (T), cmp, data);
}
@@ -1400,7 +1400,7 @@ public:
void ordered_remove (unsigned);
void unordered_remove (unsigned);
void block_remove (unsigned, unsigned);
- void qsort (int (*) (const void *, const void *));
+ void sort (sort_cmp_fn *, void *);
T *bsearch (const void *key, int (*compar)(const void *, const void *));
unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
bool contains (const T &search) const;
@@ -1892,10 +1892,10 @@ vec<T, va_heap, vl_ptr>::block_remove (unsigned ix,
unsigned len)
template<typename T>
inline void
-vec<T, va_heap, vl_ptr>::qsort (int (*cmp) (const void *, const void *))
+vec<T, va_heap, vl_ptr>::sort (sort_cmp_fn *cmp, void *data)
{
if (m_vec)
- m_vec->qsort (cmp);
+ m_vec->sort (cmp, data);
}