On Wed, Jul 31, 2024 at 04:02:22PM +0200, Richard Biener wrote: > The following improves release_pages when using the madvise path > to sort the freelist to get more page entries contiguous and possibly > release them. This populates the unused prev pointer so the reclaim > can then easily unlink from the freelist without re-ordering it. > The paths not having madvise do not keep the memory allocated, so > I left them untouched. > > Re-bootstrap and regtest running on x86_64-unknown-linux-gnu. > > I've CCed people messing with release_pages; This doesn't really > address PR114563 but I thought I post this patch anyway - the > actual issue we run into for the PR is the linear search of > G.free_pages when that list becomes large but a requested allocation > cannot be served from it. > > PR middle-end/114563 > * ggc-page.cc (page_sort): New qsort comparator. > (release_pages): Sort the free_pages list entries after their > memory block virtual address to improve contiguous memory > chunk release.
I saw this in a profile some time ago and tried it with a slightly different patch. Instead of a full sort it uses an array to keep multiple free lists. But I couldn't find any speed ups in non checking builds later. My feeling is that an array is probably more efficient. I guess should compare both on that PR. diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc index 4245f843a29f..af1627b002c6 100644 --- a/gcc/ggc-page.cc +++ b/gcc/ggc-page.cc @@ -234,6 +234,8 @@ static struct } inverse_table[NUM_ORDERS]; +struct free_list; + /* A page_entry records the status of an allocation page. This structure is dynamically sized to fit the bitmap in_use_p. */ struct page_entry @@ -251,6 +253,9 @@ struct page_entry of the host system page size.) */ size_t bytes; + /* Free list of this page size. */ + struct free_list *free_list; + /* The address at which the memory is allocated. */ char *page; @@ -368,6 +373,15 @@ struct free_object }; #endif +constexpr int num_free_list = 8; + +/* A free_list for pages with BYTES size. */ +struct free_list +{ + size_t bytes; + page_entry *free_pages; +}; + /* The rest of the global variables. */ static struct ggc_globals { @@ -412,8 +426,8 @@ static struct ggc_globals int dev_zero_fd; #endif - /* A cache of free system pages. */ - page_entry *free_pages; + /* A cache of free system pages. Entry 0 is fallback. */ + struct free_list free_lists[num_free_list]; #ifdef USING_MALLOC_PAGE_GROUPS page_group *page_groups; @@ -754,6 +768,26 @@ clear_page_group_in_use (page_group *group, char *page) } #endif +/* Find a free list for ENTRY_SIZE. */ + +static inline struct free_list * +find_free_list (size_t entry_size) +{ + int i; + for (i = 1; i < num_free_list; i++) + { + if (G.free_lists[i].bytes == entry_size) + return &G.free_lists[i]; + if (G.free_lists[i].bytes == 0) + { + G.free_lists[i].bytes = entry_size; + return &G.free_lists[i]; + } + } + /* Fallback. */ + return &G.free_lists[0]; +} + /* Allocate a new page for allocating objects of size 2^ORDER, and return an entry for it. The entry is not added to the appropriate page_table list. */ @@ -770,6 +804,7 @@ alloc_page (unsigned order) #ifdef USING_MALLOC_PAGE_GROUPS page_group *group; #endif + struct free_list *free_list; num_objects = OBJECTS_PER_PAGE (order); bitmap_size = BITMAP_SIZE (num_objects + 1); @@ -782,8 +817,10 @@ alloc_page (unsigned order) entry = NULL; page = NULL; + free_list = find_free_list (entry_size); + /* Check the list of free pages for one we can use. */ - for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp) + for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp) if (p->bytes == entry_size) break; @@ -816,7 +853,7 @@ alloc_page (unsigned order) /* We want just one page. Allocate a bunch of them and put the extras on the freelist. (Can only do this optimization with mmap for backing store.) */ - struct page_entry *e, *f = G.free_pages; + struct page_entry *e, *f = free_list->free_pages; int i, entries = GGC_QUIRE_SIZE; page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false); @@ -833,12 +870,13 @@ alloc_page (unsigned order) e = XCNEWVAR (struct page_entry, page_entry_size); e->order = order; e->bytes = G.pagesize; + e->free_list = free_list; e->page = page + (i << G.lg_pagesize); e->next = f; f = e; } - G.free_pages = f; + free_list->free_pages = f; } else page = alloc_anon (NULL, entry_size, true); @@ -904,12 +942,13 @@ alloc_page (unsigned order) e = XCNEWVAR (struct page_entry, page_entry_size); e->order = order; e->bytes = G.pagesize; + e->free_list = free_list; e->page = a; e->group = group; e->next = f; f = e; } - G.free_pages = f; + free_list->free_pages = f; } } #endif @@ -918,6 +957,7 @@ alloc_page (unsigned order) entry = XCNEWVAR (struct page_entry, page_entry_size); entry->bytes = entry_size; + entry->free_list = free_list; entry->page = page; entry->context_depth = G.context_depth; entry->order = order; @@ -1006,14 +1046,15 @@ free_page (page_entry *entry) adjust_depth (); - entry->next = G.free_pages; - G.free_pages = entry; + struct free_list *free_list = entry->free_list; + entry->next = free_list->free_pages; + free_list->free_pages = entry; } -/* Release the free page cache to the system. */ +/* Release the free page cache for FREE_LIST to the system. */ static void -release_pages (void) +do_release_pages (struct free_list *free_list) { size_t n1 = 0; size_t n2 = 0; @@ -1031,7 +1072,7 @@ release_pages (void) This does not always work because the free_pages list is only approximately sorted. */ - p = G.free_pages; + p = free_list->free_pages; prev = NULL; while (p) { @@ -1060,7 +1101,7 @@ release_pages (void) if (prev) prev->next = p; else - G.free_pages = p; + free_list->free_pages = p; G.bytes_mapped -= mapped_len; n1 += len; continue; @@ -1071,7 +1112,7 @@ release_pages (void) /* Now give back the fragmented pages to the OS, but keep the address space to reuse it next time. */ - for (p = G.free_pages; p; ) + for (p = free_list->free_pages; p; ) { if (p->discarded) { @@ -1108,7 +1149,7 @@ release_pages (void) size_t len; /* Gather up adjacent pages so they are unmapped together. */ - p = G.free_pages; + p = free_list->free_pages; while (p) { @@ -1131,14 +1172,14 @@ release_pages (void) G.bytes_mapped -= len; } - G.free_pages = NULL; + free_list->free_pages = NULL; #endif #ifdef USING_MALLOC_PAGE_GROUPS page_entry **pp, *p; page_group **gp, *g; /* Remove all pages from free page groups from the list. */ - pp = &G.free_pages; + pp = &free_list->free_pages; while ((p = *pp) != NULL) if (p->group->in_use == 0) { @@ -1172,6 +1213,15 @@ release_pages (void) } } +/* Release the free page cache to the system. */ + +static void +release_pages () +{ + for (int i = 0; i < num_free_list; i++) + do_release_pages (&G.free_lists[i]); +} + /* This table provides a fast way to determine ceil(log_2(size)) for allocation requests. The minimum allocation size is eight bytes. */ #define NUM_SIZE_LOOKUP 512 @@ -1774,9 +1824,10 @@ init_ggc (void) /* We have a good page, might as well hold onto it... */ e = XCNEW (struct page_entry); e->bytes = G.pagesize; + e->free_list = find_free_list (G.pagesize); e->page = p; - e->next = G.free_pages; - G.free_pages = e; + e->next = e->free_list->free_pages; + e->free_list->free_pages = e; } #endif @@ -2650,6 +2701,7 @@ ggc_pch_read (FILE *f, void *addr) - sizeof (long) + BITMAP_SIZE (num_objs + 1))); entry->bytes = bytes; + entry->free_list = find_free_list (bytes); entry->page = offs; entry->context_depth = 0; offs += bytes;