On Wed, Jul 31, 2024 at 5:37 PM Andi Kleen <a...@linux.intel.com> wrote: > > On Wed, Jul 31, 2024 at 04:02:22PM +0200, Richard Biener wrote: > > The following improves release_pages when using the madvise path > > to sort the freelist to get more page entries contiguous and possibly > > release them. This populates the unused prev pointer so the reclaim > > can then easily unlink from the freelist without re-ordering it. > > The paths not having madvise do not keep the memory allocated, so > > I left them untouched. > > > > Re-bootstrap and regtest running on x86_64-unknown-linux-gnu. > > > > I've CCed people messing with release_pages; This doesn't really > > address PR114563 but I thought I post this patch anyway - the > > actual issue we run into for the PR is the linear search of > > G.free_pages when that list becomes large but a requested allocation > > cannot be served from it. > > > > PR middle-end/114563 > > * ggc-page.cc (page_sort): New qsort comparator. > > (release_pages): Sort the free_pages list entries after their > > memory block virtual address to improve contiguous memory > > chunk release. > > I saw this in a profile some time ago and tried it with a slightly > different patch. Instead of a full sort it uses an array to keep > multiple free lists. But I couldn't find any speed ups in non checking > builds later. > > My feeling is that an array is probably more efficient. > > I guess should compare both on that PR. > > > diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc > index 4245f843a29f..af1627b002c6 100644 > --- a/gcc/ggc-page.cc > +++ b/gcc/ggc-page.cc > @@ -234,6 +234,8 @@ static struct > } > inverse_table[NUM_ORDERS]; > > +struct free_list; > + > /* A page_entry records the status of an allocation page. This > structure is dynamically sized to fit the bitmap in_use_p. */ > struct page_entry > @@ -251,6 +253,9 @@ struct page_entry > of the host system page size.) */ > size_t bytes; > > + /* Free list of this page size. */ > + struct free_list *free_list; > +
? this seems misplaced > /* The address at which the memory is allocated. */ > char *page; > > @@ -368,6 +373,15 @@ struct free_object > }; > #endif > > +constexpr int num_free_list = 8; > + > +/* A free_list for pages with BYTES size. */ > +struct free_list > +{ > + size_t bytes; > + page_entry *free_pages; > +}; > + > /* The rest of the global variables. */ > static struct ggc_globals > { > @@ -412,8 +426,8 @@ static struct ggc_globals > int dev_zero_fd; > #endif > > - /* A cache of free system pages. */ > - page_entry *free_pages; > + /* A cache of free system pages. Entry 0 is fallback. */ > + struct free_list free_lists[num_free_list]; I also thought of this, but ... > #ifdef USING_MALLOC_PAGE_GROUPS > page_group *page_groups; > @@ -754,6 +768,26 @@ clear_page_group_in_use (page_group *group, char *page) > } > #endif > > +/* Find a free list for ENTRY_SIZE. */ > + > +static inline struct free_list * > +find_free_list (size_t entry_size) > +{ > + int i; > + for (i = 1; i < num_free_list; i++) > + { > + if (G.free_lists[i].bytes == entry_size) > + return &G.free_lists[i]; > + if (G.free_lists[i].bytes == 0) > + { > + G.free_lists[i].bytes = entry_size; > + return &G.free_lists[i]; > + } > + } > + /* Fallback. */ > + return &G.free_lists[0]; > +} > + > /* Allocate a new page for allocating objects of size 2^ORDER, > and return an entry for it. The entry is not added to the > appropriate page_table list. */ > @@ -770,6 +804,7 @@ alloc_page (unsigned order) > #ifdef USING_MALLOC_PAGE_GROUPS > page_group *group; > #endif > + struct free_list *free_list; > > num_objects = OBJECTS_PER_PAGE (order); > bitmap_size = BITMAP_SIZE (num_objects + 1); > @@ -782,8 +817,10 @@ alloc_page (unsigned order) > entry = NULL; > page = NULL; > > + free_list = find_free_list (entry_size); ... this made me not consider it. Looking closer I think 'entry_size' only depends on 'order' and thus a direct mapping would work here. > + > /* Check the list of free pages for one we can use. */ > - for (pp = &G.free_pages, p = *pp; p; pp = &p->next, p = *pp) > + for (pp = &free_list->free_pages, p = *pp; p; pp = &p->next, p = *pp) > if (p->bytes == entry_size) and this search should then go - if this cannot become O(1) adding any complexity elsewhere is not worth it. I also saw we only use "pages" for low-order allocations but malloc for higher orders but with allocating in QUIRE_SIZE granularity (2MB to get hugepages) it seems it would be interesting to change that and not immediately split up the 2MB into page-size chunks. In the PR you also find a patch attached to add a skip-list to the single free_list, this improved the testcase but it's not O(1) picking from it still. > break; > > @@ -816,7 +853,7 @@ alloc_page (unsigned order) > /* We want just one page. Allocate a bunch of them and put the > extras on the freelist. (Can only do this optimization with > mmap for backing store.) */ > - struct page_entry *e, *f = G.free_pages; > + struct page_entry *e, *f = free_list->free_pages; > int i, entries = GGC_QUIRE_SIZE; > > page = alloc_anon (NULL, G.pagesize * GGC_QUIRE_SIZE, false); > @@ -833,12 +870,13 @@ alloc_page (unsigned order) > e = XCNEWVAR (struct page_entry, page_entry_size); > e->order = order; > e->bytes = G.pagesize; > + e->free_list = free_list; > e->page = page + (i << G.lg_pagesize); > e->next = f; > f = e; > } > > - G.free_pages = f; > + free_list->free_pages = f; > } > else > page = alloc_anon (NULL, entry_size, true); > @@ -904,12 +942,13 @@ alloc_page (unsigned order) > e = XCNEWVAR (struct page_entry, page_entry_size); > e->order = order; > e->bytes = G.pagesize; > + e->free_list = free_list; > e->page = a; > e->group = group; > e->next = f; > f = e; > } > - G.free_pages = f; > + free_list->free_pages = f; > } > } > #endif > @@ -918,6 +957,7 @@ alloc_page (unsigned order) > entry = XCNEWVAR (struct page_entry, page_entry_size); > > entry->bytes = entry_size; > + entry->free_list = free_list; > entry->page = page; > entry->context_depth = G.context_depth; > entry->order = order; > @@ -1006,14 +1046,15 @@ free_page (page_entry *entry) > > adjust_depth (); > > - entry->next = G.free_pages; > - G.free_pages = entry; > + struct free_list *free_list = entry->free_list; > + entry->next = free_list->free_pages; > + free_list->free_pages = entry; > } > > -/* Release the free page cache to the system. */ > +/* Release the free page cache for FREE_LIST to the system. */ > > static void > -release_pages (void) > +do_release_pages (struct free_list *free_list) > { > size_t n1 = 0; > size_t n2 = 0; > @@ -1031,7 +1072,7 @@ release_pages (void) > This does not always work because the free_pages list is only > approximately sorted. */ > > - p = G.free_pages; > + p = free_list->free_pages; > prev = NULL; > while (p) > { > @@ -1060,7 +1101,7 @@ release_pages (void) > if (prev) > prev->next = p; > else > - G.free_pages = p; > + free_list->free_pages = p; > G.bytes_mapped -= mapped_len; > n1 += len; > continue; > @@ -1071,7 +1112,7 @@ release_pages (void) > /* Now give back the fragmented pages to the OS, but keep the address > space to reuse it next time. */ > > - for (p = G.free_pages; p; ) > + for (p = free_list->free_pages; p; ) > { > if (p->discarded) > { > @@ -1108,7 +1149,7 @@ release_pages (void) > size_t len; > > /* Gather up adjacent pages so they are unmapped together. */ > - p = G.free_pages; > + p = free_list->free_pages; > > while (p) > { > @@ -1131,14 +1172,14 @@ release_pages (void) > G.bytes_mapped -= len; > } > > - G.free_pages = NULL; > + free_list->free_pages = NULL; > #endif > #ifdef USING_MALLOC_PAGE_GROUPS > page_entry **pp, *p; > page_group **gp, *g; > > /* Remove all pages from free page groups from the list. */ > - pp = &G.free_pages; > + pp = &free_list->free_pages; > while ((p = *pp) != NULL) > if (p->group->in_use == 0) > { > @@ -1172,6 +1213,15 @@ release_pages (void) > } > } > > +/* Release the free page cache to the system. */ > + > +static void > +release_pages () > +{ > + for (int i = 0; i < num_free_list; i++) > + do_release_pages (&G.free_lists[i]); > +} > + > /* This table provides a fast way to determine ceil(log_2(size)) for > allocation requests. The minimum allocation size is eight bytes. */ > #define NUM_SIZE_LOOKUP 512 > @@ -1774,9 +1824,10 @@ init_ggc (void) > /* We have a good page, might as well hold onto it... */ > e = XCNEW (struct page_entry); > e->bytes = G.pagesize; > + e->free_list = find_free_list (G.pagesize); > e->page = p; > - e->next = G.free_pages; > - G.free_pages = e; > + e->next = e->free_list->free_pages; > + e->free_list->free_pages = e; > } > #endif > > @@ -2650,6 +2701,7 @@ ggc_pch_read (FILE *f, void *addr) > - sizeof (long) > + BITMAP_SIZE (num_objs + 1))); > entry->bytes = bytes; > + entry->free_list = find_free_list (bytes); > entry->page = offs; > entry->context_depth = 0; > offs += bytes; > >