> > IMO, not wasting memory should be priority #1 here, before simplicity
> > or multithread-safety. ('cvs' is not a multithreaded program.)
>
> Hmm, well, if CVS is the only user and we have a fix that satisfies a
> CVS maintainer, perhaps we can simply agree to disagree about this
> particular simplicity-vs-performance tradeoff.
"agree to disagree" is not an option for me, until we have seen the
actual figures.
Therefore I'm committing this change that
- adds a benchmark for the various implementations of pagealign_alloc,
- for this purpose, allows to change the implementation without
recompilation.
You can run the benchmark like this:
1)
$ ./gnulib-tool --create-testdir --dir=../testdir1 pagealign_alloc
2) In the testdir:
$ ./configure CPPFLAGS=-Wall
$ make
$ gltests/bench-pagealign_alloc b
$ gltests/bench-pagealign_alloc c
$ gltests/bench-pagealign_alloc d
$ gltests/bench-pagealign_alloc e
$ gltests/bench-pagealign_alloc f
where the implementation choice is:
b: malloc
c: mmap (if available)
d: posix_memalign (if available)
e: _aligned_malloc (if available)
f: VirtualAlloc (if available)
What I measure is:
b c d e f
glibc 176% 101% 175% --- ---
musl libc 198% 101% 197% --- ---
macOS 190% 100% 195% --- ---
FreeBSD 190% 100% 380% --- ---
NetBSD 185% 101% 379% --- ---
OpenBSD 209% 134% 105% --- ---
AIX 177% 101% 176% --- ---
Solaris 11.4 176% 101% 175% --- ---
Cygwin 181% 100% 181% --- ---
native Windows 184% --- --- 180% crash
Android 182% 101% 181% --- ---
This means:
* On all platforms, malloc() wastes a lot of memory: requesting 1 page
actually allocates 2 pages.
* On all platforms except OpenBSD, posix_memalign() wastes a lot of
memory: requesting 1 page actually allocates 2 pages, on FreeBSD and
NetBSD even 4 pages (!).
* On all platforms except OpenBSD and native Windows, the mmap based
approach is the winner: it hardly wastes any memory.
* On native Windows, _aligned_malloc works but is essentially as bad
as malloc.
* On native Windows, there's a problem with my use of VirtualAlloc.
If someone has a hint, that would be welcome.
I therefore propose to
1) change the default back from posix_memalign to mmap,
except on OpenBSD.
2) since the "list" is not going away, use a different implementation
for it: either prev/next fields (doubly-linked list) or a hash-set
container.
Bruno
>From b4866d2991941cbd16e19f1d751fbb8a8f13647c Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Thu, 11 Sep 2025 16:59:54 +0200
Subject: [PATCH] pagealign_alloc: Add benchmark for comparing different
implementations.
* lib/pagealign_alloc.h (pagealign_impl_t): New type.
(pagealign_impl): New declaration.
* lib/pagealign_alloc.c: On native Windows, include <malloc.h> and
<windows.h>.
(pagealign_impl): New variable.
(info_t): Change to a union type.
(get_default_impl): New function.
(pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl.
* tests/bench-pagealign_alloc.c: New file.
* modules/pagealign_alloc-tests: New file.
---
ChangeLog | 14 +++
lib/pagealign_alloc.c | 217 ++++++++++++++++++++++++++--------
lib/pagealign_alloc.h | 15 +++
modules/pagealign_alloc-tests | 13 ++
tests/bench-pagealign_alloc.c | 146 +++++++++++++++++++++++
5 files changed, 357 insertions(+), 48 deletions(-)
create mode 100644 modules/pagealign_alloc-tests
create mode 100644 tests/bench-pagealign_alloc.c
diff --git a/ChangeLog b/ChangeLog
index 93901ebace..235fc24012 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2025-09-11 Bruno Haible <[email protected]>
+
+ pagealign_alloc: Add benchmark for comparing different implementations.
+ * lib/pagealign_alloc.h (pagealign_impl_t): New type.
+ (pagealign_impl): New declaration.
+ * lib/pagealign_alloc.c: On native Windows, include <malloc.h> and
+ <windows.h>.
+ (pagealign_impl): New variable.
+ (info_t): Change to a union type.
+ (get_default_impl): New function.
+ (pagealign_alloc, pagealign_free): Dispatch according to pagealign_impl.
+ * tests/bench-pagealign_alloc.c: New file.
+ * modules/pagealign_alloc-tests: New file.
+
2025-09-11 Bruno Haible <[email protected]>
pagealign_alloc, vma-prot tests: Fix use of HAVE_MMAP (regr. yesterday).
diff --git a/lib/pagealign_alloc.c b/lib/pagealign_alloc.c
index dc739b9509..28aca9baf5 100644
--- a/lib/pagealign_alloc.c
+++ b/lib/pagealign_alloc.c
@@ -19,6 +19,7 @@
#include <config.h>
+/* Specification. */
#include "pagealign_alloc.h"
#include <errno.h>
@@ -31,6 +32,11 @@
#if HAVE_SYS_MMAN_H
# include <sys/mman.h>
#endif
+#if defined _WIN32 && !defined __CYGWIN__
+# include <malloc.h>
+# define WIN32_LEAN_AND_MEAN /* avoid including junk */
+# include <windows.h>
+#endif
#include <error.h>
#include "xalloc.h"
@@ -46,16 +52,22 @@
#endif
-#if ! HAVE_POSIX_MEMALIGN
+/* Implementation of pagealign_alloc. */
+pagealign_impl_t pagealign_impl;
-# if HAVE_SYS_MMAN_H
-/* For each memory region, we store its size. */
-typedef size_t info_t;
-# else
-/* For each memory region, we store the original pointer returned by
- malloc(). */
-typedef void * info_t;
-# endif
+typedef union
+{
+ /* For PA_IMPL_MALLOC:
+ For each memory region, we store the original pointer returned by
+ malloc(). */
+ void *pointer;
+ /* For PA_IMPL_MMAP, PA_IMPL_VIRTUAL_ALLOC:
+ For each memory region, we store its size. */
+ size_t size;
+} info_t;
+
+
+/* For PA_IMPL_MALLOC, PA_IMPL_MMAP. */
/* A simple linked list of allocated memory regions. It is probably not the
most efficient way to store these, but anyway... */
@@ -70,7 +82,6 @@ struct memnode_s
/* The list of currently allocated memory regions. */
static memnode_t *memnode_table = NULL;
-
static void
new_memnode (void *aligned_ptr, info_t info)
{
@@ -81,7 +92,6 @@ new_memnode (void *aligned_ptr, info_t info)
memnode_table = new_node;
}
-
/* Dispose of the memnode containing a map for the ALIGNED_PTR in question
and return the content of the node's INFO field. */
static info_t
@@ -108,48 +118,118 @@ get_memnode (void *aligned_ptr)
return ret;
}
-#endif /* !HAVE_POSIX_MEMALIGN */
+
+/* Returns the default implementation. */
+static pagealign_impl_t
+get_default_impl (void)
+{
+#if HAVE_POSIX_MEMALIGN
+ return PA_IMPL_POSIX_MEMALIGN;
+#elif HAVE_SYS_MMAN_H
+ return PA_IMPL_MMAP;
+#else
+ return PA_IMPL_MALLOC;
+#endif
+}
void *
pagealign_alloc (size_t size)
{
void *ret;
-#if HAVE_POSIX_MEMALIGN
- /* Prefer posix_memalign to malloc and mmap,
- as it typically scales better when there are many allocations. */
- int status = posix_memalign (&ret, getpagesize (), size);
- if (status)
+ pagealign_impl_t impl;
+
+ impl = pagealign_impl;
+ if (impl == PA_IMPL_DEFAULT)
+ impl = get_default_impl ();
+
+ switch (impl)
{
- errno = status;
+ case PA_IMPL_MALLOC:
+ {
+ size_t pagesize = getpagesize ();
+ void *unaligned_ptr = malloc (size + pagesize - 1);
+ if (unaligned_ptr == NULL)
+ {
+ /* Set errno. We don't know whether malloc already set errno: some
+ implementations of malloc do, some don't. */
+ errno = ENOMEM;
+ return NULL;
+ }
+ ret = (char *) unaligned_ptr
+ + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1));
+ info_t info;
+ info.pointer = unaligned_ptr;
+ new_memnode (ret, info);
+ }
+ break;
+
+ case PA_IMPL_MMAP:
+ #if HAVE_SYS_MMAN_H
+ ret = mmap (NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ret == MAP_FAILED)
+ return NULL;
+ info_t info;
+ info.size = size;
+ new_memnode (ret, info);
+ break;
+ #else
+ errno = ENOSYS;
return NULL;
- }
-#elif HAVE_SYS_MMAN_H
- /* Prefer mmap to malloc, since the latter often wastes an entire
- memory page per call. */
- ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
- -1, 0);
- if (ret == MAP_FAILED)
- return NULL;
- new_memnode (ret, size);
-#else /* !HAVE_SYS_MMAN_H && !HAVE_POSIX_MEMALIGN */
- size_t pagesize = getpagesize ();
- void *unaligned_ptr = malloc (size + pagesize - 1);
- if (unaligned_ptr == NULL)
- {
- /* Set errno. We don't know whether malloc already set errno: some
- implementations of malloc do, some don't. */
- errno = ENOMEM;
+ #endif
+
+ case PA_IMPL_POSIX_MEMALIGN:
+ #if HAVE_POSIX_MEMALIGN
+ {
+ int status = posix_memalign (&ret, getpagesize (), size);
+ if (status)
+ {
+ errno = status;
+ return NULL;
+ }
+ }
+ break;
+ #else
+ errno = ENOSYS;
+ return NULL;
+ #endif
+
+ case PA_IMPL_ALIGNED_MALLOC:
+ #if defined _WIN32 && !defined __CYGWIN__
+ /* Documentation:
+ <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc> */
+ return _aligned_malloc (size, getpagesize ());
+ #else
+ errno = ENOSYS;
return NULL;
+ #endif
+
+ case PA_IMPL_VIRTUAL_ALLOC:
+ #if defined _WIN32 && !defined __CYGWIN__
+ /* Documentation:
+ <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc> */
+ ret = VirtualAlloc (NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+ if (ret == NULL)
+ {
+ errno = ENOMEM;
+ return NULL;
+ }
+ info_t info;
+ info.size = size;
+ new_memnode (ret, info);
+ #else
+ errno = ENOSYS;
+ return NULL;
+ #endif
+
+ default:
+ abort ();
}
- ret = (char *) unaligned_ptr
- + ((- (uintptr_t) unaligned_ptr) & (pagesize - 1));
- new_memnode (ret, unaligned_ptr);
-#endif /* HAVE_SYS_MMAN_H && HAVE_POSIX_MEMALIGN */
+
return ret;
}
-
void *
pagealign_xalloc (size_t size)
{
@@ -165,12 +245,53 @@ pagealign_xalloc (size_t size)
void
pagealign_free (void *aligned_ptr)
{
-#if HAVE_POSIX_MEMALIGN
- free (aligned_ptr);
-#elif HAVE_SYS_MMAN_H
- if (munmap (aligned_ptr, get_memnode (aligned_ptr)) < 0)
- error (EXIT_FAILURE, errno, "Failed to unmap memory");
-#else
- free (get_memnode (aligned_ptr));
-#endif
+ pagealign_impl_t impl;
+
+ impl = pagealign_impl;
+ if (impl == PA_IMPL_DEFAULT)
+ impl = get_default_impl ();
+
+ switch (impl)
+ {
+ case PA_IMPL_MALLOC:
+ free (get_memnode (aligned_ptr).pointer);
+ break;
+
+ case PA_IMPL_MMAP:
+ #if HAVE_SYS_MMAN_H
+ if (munmap (aligned_ptr, get_memnode (aligned_ptr).size) < 0)
+ error (EXIT_FAILURE, errno, "Failed to unmap memory");
+ #else
+ abort ();
+ #endif
+
+ case PA_IMPL_POSIX_MEMALIGN:
+ #if HAVE_POSIX_MEMALIGN
+ free (aligned_ptr);
+ #else
+ abort ();
+ #endif
+
+ case PA_IMPL_ALIGNED_MALLOC:
+ #if defined _WIN32 && !defined __CYGWIN__
+ /* Documentation:
+ <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-free> */
+ _aligned_free (aligned_ptr);
+ #else
+ abort ();
+ #endif
+
+ case PA_IMPL_VIRTUAL_ALLOC:
+ #if defined _WIN32 && !defined __CYGWIN__
+ /* Documentation:
+ <https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualfree> */
+ if (!VirtualFree (aligned_ptr, get_memnode (aligned_ptr).size, MEM_RELEASE))
+ error (EXIT_FAILURE, 0, "Failed to free memory");
+ #else
+ abort ();
+ #endif
+
+ default:
+ abort ();
+ }
}
diff --git a/lib/pagealign_alloc.h b/lib/pagealign_alloc.h
index 3134d4e460..e04d895b48 100644
--- a/lib/pagealign_alloc.h
+++ b/lib/pagealign_alloc.h
@@ -55,6 +55,21 @@ extern void *pagealign_xalloc (size_t size)
_GL_ATTRIBUTE_ALLOC_SIZE ((1)) _GL_ATTRIBUTE_RETURNS_NONNULL;
+/* Selects the implementation of pagealign_alloc.
+ If you set pagealign_impl, it must be before the first invocation of
+ pagealign_alloc or pagealign_xalloc. */
+typedef enum
+{
+ PA_IMPL_DEFAULT = 0, /* platform-dependent default */
+ PA_IMPL_MALLOC = 1, /* malloc */
+ PA_IMPL_MMAP = 2, /* mmap (if available) */
+ PA_IMPL_POSIX_MEMALIGN = 3, /* posix_memalign (if available) */
+ PA_IMPL_ALIGNED_MALLOC = 4, /* _aligned_malloc (if available) */
+ PA_IMPL_VIRTUAL_ALLOC = 5 /* VirtualAlloc (if available) */
+} pagealign_impl_t;
+extern pagealign_impl_t pagealign_impl;
+
+
#ifdef __cplusplus
}
#endif
diff --git a/modules/pagealign_alloc-tests b/modules/pagealign_alloc-tests
new file mode 100644
index 0000000000..bc9f16fc16
--- /dev/null
+++ b/modules/pagealign_alloc-tests
@@ -0,0 +1,13 @@
+Files:
+tests/bench-pagealign_alloc.c
+
+Depends-on:
+getpagesize
+get-rusage-as
+get-rusage-data
+
+configure.ac:
+
+Makefile.am:
+noinst_PROGRAMS += bench-pagealign_alloc
+bench_pagealign_alloc_CPPFLAGS = $(AM_CPPFLAGS) -DNDEBUG
diff --git a/tests/bench-pagealign_alloc.c b/tests/bench-pagealign_alloc.c
new file mode 100644
index 0000000000..bdba5bfecf
--- /dev/null
+++ b/tests/bench-pagealign_alloc.c
@@ -0,0 +1,146 @@
+/* Benchmarks for the pagealign_alloc module.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* This benchmark focuses on the amount of allocated memory, in order
+ to minimize wasted memory. */
+
+#include <config.h>
+
+/* Specification. */
+#include "pagealign_alloc.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "resource-ext.h"
+
+static size_t pagesize;
+
+/* Does a few pagealign_alloc invocations. */
+static int
+allocate_some_pages (void)
+{
+ int i;
+
+ for (i = 1; i <= 9; i++)
+ if (pagealign_alloc (pagesize) == NULL)
+ return -1;
+ for (i = 1; i <= 2; i++)
+ if (pagealign_alloc (2 * pagesize) == NULL)
+ return -1;
+ for (i = 1; i <= 1; i++)
+ if (pagealign_alloc (3 * pagesize) == NULL)
+ return -1;
+
+ return 0;
+}
+
+/* Returns the total size requested by allocate_some_pages (). */
+static size_t
+size_of_some_pages (void)
+{
+ return 16 * pagesize;
+}
+
+static void
+show_stats (intptr_t initial_as, intptr_t initial_data, int count)
+{
+ if (count == 0)
+ printf ("Allocation failed.\n");
+ else
+ {
+ intptr_t requested = count * size_of_some_pages ();
+ intptr_t as_increase = get_rusage_as () - initial_as;
+ intptr_t data_increase = get_rusage_data () - initial_data;
+ printf ("count = %5d: requested: %6ld pages, allocated AS: %6ld pages (%.2f%%), allocated DATA: %6ld pages (%.2f%%)\n",
+ count,
+ (long int) (requested / pagesize),
+ (long int) (as_increase / pagesize), 100 * (double) as_increase / (double) requested,
+ (long int) (data_increase / pagesize), 100 * (double) data_increase / (double) requested);
+ }
+}
+
+static void
+do_test (void)
+{
+ intptr_t initial_as = get_rusage_as ();
+ intptr_t initial_data = get_rusage_data ();
+ int count;
+ for (count = 0; count <= 10000; count++)
+ {
+ if (count == 10 || count == 30 || count == 90 || count == 270
+ || count == 810 || count == 2430 || count == 7290)
+ show_stats (initial_as, initial_data, count);
+
+ if (allocate_some_pages () < 0)
+ break;
+ }
+ show_stats (initial_as, initial_data, count);
+}
+
+int
+main (int argc, char *argv[])
+{
+ if (argc != 2)
+ {
+ fprintf (stderr, "Usage: %s TESTS\n", argv[0]);
+ fprintf (stderr, "Example: %s abcdef\n", argv[0]);
+ exit (1);
+ }
+
+ const char *tests = argv[1];
+
+ pagesize = getpagesize ();
+
+ /* Execute each test. */
+ size_t i;
+ for (i = 0; i < strlen (tests); i++)
+ {
+ char test = tests[i];
+
+ switch (test)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ pagealign_impl = test - 'a';
+ do_test ();
+ break;
+ default:
+ /* Ignore. */
+ ;
+ }
+ }
+
+ return 0;
+}
+
+/* Results:
+
+ b c d e f
+
+glibc 176% 101% 175% --- ---
+musl libc 198% 101% 197% --- ---
+macOS 190% 100% 195% --- ---
+FreeBSD 190% 100% 380% --- ---
+NetBSD 185% 101% 379% --- ---
+OpenBSD 209% 134% 105% --- ---
+AIX 177% 101% 176% --- ---
+Solaris 11.4 176% 101% 175% --- ---
+Cygwin 181% 100% 181% --- ---
+native Windows 184% --- --- 180% crash
+Android 182% 101% 181% --- ---
+*/
--
2.50.1