[llvm] [clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

2023-12-22 Thread Giulio Eulisse via cfe-commits

ktf wrote:

 I was carried away by the heavy ion run and other priorities. i can try to
have a look again after Christmas. Feel free to close if you prefer.

Ciao,
Giulio

On Fri, Dec 22 2023 at 9:32 AM, Vassil Vassilev ***@***.***>
wrote:

> @ktf  what is the fate of this PR?
>
> —
> Reply to this email directly, view it on GitHub
> ,
> or unsubscribe
> 
> .
> You are receiving this because you were mentioned.Message ID:
> ***@***.***>
>


https://github.com/llvm/llvm-project/pull/67960
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From afedf8a208a2517b19d50fd89361ea242a03f802 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   8 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 305 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 +++
 8 files changed, 619 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..c52b6121db76d89 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,13 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - std::count(TypesLoaded.materialisedBegin(),
+  TypesLoaded.materialisedEnd(),
+  QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::c

[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits


@@ -7944,9 +7944,13 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - std::count(TypesLoaded.materialisedBegin(),

ktf wrote:

No, materialised simply means the page holding it was allocated. One still 
needs to look for non default initialised elements as before.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,301 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  uintptr_t Allocator = 0;
+
+  constexpr static T *invalidPage() { return reinterpret_cast(SIZE_MAX); }
+
+public:
+  // Default constructor. We build our own allocator.
+  PagedVector()
+  : Allocator(reinterpret_cast(new BumpPtrAllocator) | 0x1) {}
+  PagedVector(BumpPtrAllocator *A)
+  : Allocator(reinterpret_cast(A)) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator & 0x1) {
+  delete getAllocator();
+}
+  }
+
+  // Get the allocator.
+  BumpPtrAllocator *getAllocator() const {
+return reinterpret_cast(Allocator & ~0x1);
+  }
+  // Lookup an element at position Index.
+  T &operator[](std::size_t Index) const { return at(Index); }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &at(std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+auto *&PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == invalidPage()) {
+  PagePtr = getAllocator()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I) {
+new (PagePtr + I) T();
+  }
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + PagePtr);
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the expand method without allocating more pages.
+  std::size_t capacity() const { return PageToDataIdx.size() * PAGE_SIZE; }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // expand method.
+  std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void expand(std::size_t NewSize) {
+// You cannot shrink the vector, otherwise
+// one would have to invalidate contents which is expensive and
+// while giving the false hope that the resize is cheap.
+if (NewSize <= Size) {
+  return;
+}
+// If the capacity is enough, just update the size and continue
+// with the currently allocated pages.
+if (NewSize <= capacity()) {
+  Size = NewSize;
+  return;
+}
+// The number of pages to allocate. The Remainder is calculated
+// for the case in which the NewSize is not a multiple of PAGE_SIZE.
+// In that case we need one more page.
+auto Pages = NewSize / PAGE_SIZE;
+auto Remainder = NewSize % PAGE_SIZ

[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,301 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  uintptr_t Allocator = 0;
+
+  constexpr static T *invalidPage() { return reinterpret_cast(SIZE_MAX); }
+
+public:
+  // Default constructor. We build our own allocator.
+  PagedVector()
+  : Allocator(reinterpret_cast(new BumpPtrAllocator) | 0x1) {}
+  PagedVector(BumpPtrAllocator *A)
+  : Allocator(reinterpret_cast(A)) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator & 0x1) {
+  delete getAllocator();
+}
+  }
+
+  // Get the allocator.
+  BumpPtrAllocator *getAllocator() const {
+return reinterpret_cast(Allocator & ~0x1);
+  }
+  // Lookup an element at position Index.
+  T &operator[](std::size_t Index) const { return at(Index); }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &at(std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+auto *&PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == invalidPage()) {
+  PagePtr = getAllocator()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I) {
+new (PagePtr + I) T();
+  }
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + PagePtr);
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the expand method without allocating more pages.
+  std::size_t capacity() const { return PageToDataIdx.size() * PAGE_SIZE; }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // expand method.
+  std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void expand(std::size_t NewSize) {

ktf wrote:

Yes, now that I use the BumpPtrAllocator it becomes somewhat easy to support 
shrinking. Updated with the associated code.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From e99bda447b2de2d356539028db7503bdc2e93273 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (De

[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits


@@ -7944,9 +7944,13 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - std::count(TypesLoaded.materialisedBegin(),

ktf wrote:

Now it does.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (De

[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-18 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;

ktf wrote:

No, indeed, I have not checked. Sorry for not replying earlier. I will try to 
have some benchmark as soon as I finish some other unrelated unrelated tests.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/2] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(),

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/3] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(),

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/4] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(),

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,265 @@
+//===- llvm/unittest/ADT/PagedVectorTest.cpp 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// PagedVector unit tests.
+//
+//===--===//
+
+#include "llvm/ADT/PagedVector.h"
+#include "gtest/gtest.h"
+#include 
+
+namespace llvm {
+TEST(PagedVectorTest, EmptyTest) {
+  PagedVector V;
+  EXPECT_EQ(V.empty(), true);
+  EXPECT_EQ(V.size(), 0ULL);
+  EXPECT_EQ(V.capacity(), 0ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 0ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+}
+
+TEST(PagedVectorTest, ExpandTest) {
+  PagedVector V;
+  V.resize(2);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 2ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 2ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 2ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 0LL);
+}
+
+TEST(PagedVectorTest, FullPageFillingTest) {
+  PagedVector V;
+  V.resize(10);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 10; ++I) {
+V[I] = I;
+  }
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 10ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  EXPECT_EQ(V.materialisedBegin().getIndex(), 0ULL);
+  EXPECT_EQ(V.materialisedEnd().getIndex(), 10ULL);
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 10LL);
+  for (int I = 0; I < 10; ++I) {
+EXPECT_EQ(V[I], I);
+  }
+}
+
+TEST(PagedVectorTest, HalfPageFillingTest) {
+  PagedVector V;
+  V.resize(5);
+  EXPECT_EQ(V.empty(), false);
+  EXPECT_EQ(V.size(), 5ULL);
+  EXPECT_EQ(V.capacity(), 10ULL);
+  for (int I = 0; I < 5; ++I) {
+V[I] = I;
+  }
+  EXPECT_EQ(std::distance(V.materialisedBegin(), V.materialisedEnd()), 5LL);
+  for (int I = 0; I < 5; ++I) {
+EXPECT_EQ(V[I], I);
+  }

ktf wrote:

It would actually assert because the index is out of bound. I will add an 
appropriate EXPECT_DEATH(...)

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/5] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(),

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 1/6] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(),

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/11] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/12] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

ktf wrote:

@zygoloid @kuhar thank you for the detailed comments. I am still reviewing some 
of them, in particular the one of @zygoloid  about the equality operator does 
not seem to work out of the box, so I need a bit more time thinking about it. I 
pushed a new version with the current state and resolved all the comments I 
think I have fixed.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/13] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/14] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-19 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-20 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/16] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-20 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-20 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/17] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,302 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PageSize > 0, "PageSize must be greater than 0. Most likely "
+  "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator and mark it as such with 
`true` in the second pair element.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+assert(A != nullptr && "Allocator cannot be null");
+  }
+
+  ~PagedVector() {
+clear();
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Look up an element at position `Index`.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PageSize < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PageSize];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PageSize);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PageSize; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PageSize) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PageSize;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PageSize;

ktf wrote:

Fixed and test added.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,302 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PageSize > 0, "PageSize must be greater than 0. Most likely "
+  "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator and mark it as such with 
`true` in the second pair element.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+assert(A != nullptr && "Allocator cannot be null");
+  }
+
+  ~PagedVector() {
+clear();
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)

ktf wrote:

I am not sure I understand what you mean.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/18] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/19] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,302 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PageSize > 0, "PageSize must be greater than 0. Most likely "
+  "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator and mark it as such with 
`true` in the second pair element.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
+assert(A != nullptr && "Allocator cannot be null");
+  }
+
+  ~PagedVector() {
+clear();
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)

ktf wrote:

Ah, ok, I guess that's what @vgvassilev just suggested and I committed. 
Resolving this.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/22] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5d2553755add3e7d2adb0b2119d6a0c355df1551 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH 01/23] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  32 ++
 llvm/include/llvm/ADT/PagedVector.h   | 322 ++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 265 ++
 8 files changed, 633 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 72369a9166375e46fe982a5330e29d7b0709c426 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 299 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 329 ++
 8 files changed, 675 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.

ktf wrote:

I have added some documentation at top level and added a test which makes sure 
the behavior is the expected one.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From c31b72ae57575d61559379509eff2bd60bc39a3b Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 302 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 329 ++
 8 files changed, 678 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 44b01fb22b4a44a2de35299e7649626cea30852a Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 304 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 329 ++
 8 files changed, 680 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.

ktf wrote:

I have improved the comment.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From f85c72686613389671667ddd4997bec696f81c77 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 306 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 329 ++
 8 files changed, 682 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,301 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  uintptr_t Allocator = 0;
+
+  constexpr static T *invalidPage() { return reinterpret_cast(SIZE_MAX); }
+
+public:
+  // Default constructor. We build our own allocator.
+  PagedVector()
+  : Allocator(reinterpret_cast(new BumpPtrAllocator) | 0x1) {}
+  PagedVector(BumpPtrAllocator *A)
+  : Allocator(reinterpret_cast(A)) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator & 0x1) {
+  delete getAllocator();
+}
+  }
+
+  // Get the allocator.
+  BumpPtrAllocator *getAllocator() const {
+return reinterpret_cast(Allocator & ~0x1);
+  }
+  // Lookup an element at position Index.
+  T &operator[](std::size_t Index) const { return at(Index); }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &at(std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+auto *&PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == invalidPage()) {
+  PagePtr = getAllocator()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I) {
+new (PagePtr + I) T();
+  }
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + PagePtr);
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the expand method without allocating more pages.
+  std::size_t capacity() const { return PageToDataIdx.size() * PAGE_SIZE; }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // expand method.
+  std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void expand(std::size_t NewSize) {
+// You cannot shrink the vector, otherwise
+// one would have to invalidate contents which is expensive and
+// while giving the false hope that the resize is cheap.
+if (NewSize <= Size) {
+  return;
+}
+// If the capacity is enough, just update the size and continue
+// with the currently allocated pages.
+if (NewSize <= capacity()) {
+  Size = NewSize;
+  return;
+}
+// The number of pages to allocate. The Remainder is calculated
+// for the case in which the NewSize is not a multiple of PAGE_SIZE.
+// In that case we need one more page.
+auto Pages = NewSize / PAGE_SIZE;
+auto Remainder = NewSize % PAGE_SIZ

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 5c92faeea71ae72725010c256a3e29aa3a7bcfca Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 306 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 332 ++
 8 files changed, 685 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,84 @@
+//===- llvm/unittest/ADT/PagedVectorTest.cpp 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// PagedVector unit tests.
+//
+//===--===//
+
+#include "llvm/ADT/PagedVector.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+TEST(PagedVectorTest, FunctionalityTest) {

ktf wrote:

I think I have added EXPECT_DEATH for all the testable cases.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {

ktf wrote:

With the current patch, I do not see any reason why one should have move/copy 
construction/assignment. I would suggest to add this in a later commit, if at 
all needed.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 0585b814aa7cda8f6908d198b1d7f4d300c85951 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 308 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 332 ++
 8 files changed, 687 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-21 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From f21af0d0dccdc23eebe3b4c3b13c072b9398cc18 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 308 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 342 ++
 8 files changed, 697 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-22 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From dd75b18a8497381f478a85f1bf7b622583c5ca29 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 307 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 342 ++
 8 files changed, 696 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From b5f984b5521c5b226a7c5297d19eb53953481890 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 316 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 342 ++
 8 files changed, 705 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,307 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+//
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront.
+//
+// As a side effect the elements are initialised later than in a normal vector.
+// On the first access to one of the elements of a given page all, the elements
+// of the page are initialised. This also means that the elements of the page
+// are initialised beyond the size of the vector.
+//
+// Similarly on destruction the elements are destroyed only when the page is
+// not needed anymore, delaying invoking the destructor of the elements.
+//
+// Notice that this does not have iterators, because if you have iterators it
+// probably means you are going to touch all the memory in any case, so better
+// use a std::vector in the first place.

ktf wrote:

Done.

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 1512bc16040e1015b6f8ed2b51ed8efb7fd97b8e Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 317 
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 342 ++
 8 files changed, 706 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), (

[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

ktf wrote:

I am afraid I will need a bit of the to fix the C2248 from Visual Studio. 

https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits


@@ -0,0 +1,322 @@
+//===- llvm/ADT/PagedVector.h - 'Lazyly allocated' vectors *- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the PagedVector class.
+//
+//===--===//
+#ifndef LLVM_ADT_PAGEDVECTOR_H
+#define LLVM_ADT_PAGEDVECTOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include 
+#include 
+#include 
+
+namespace llvm {
+// A vector that allocates memory in pages.
+// Order is kept, but memory is allocated only when one element of the page is
+// accessed. This introduces a level of indirection, but it is useful when you
+// have a sparsely initialised vector where the full size is allocated upfront
+// with the default constructor and elements are initialised later, on first
+// access.
+//
+// Notice that this does not have iterators, because if you
+// have iterators it probably means you are going to touch
+// all the memory in any case, so better use a std::vector in
+// the first place.
+//
+// Pages are allocated in SLAB_SIZE chunks, using the BumpPtrAllocator.
+template 
+class PagedVector {
+  static_assert(PAGE_SIZE > 0, "PAGE_SIZE must be greater than 0. Most likely "
+   "you want it to be greater than 16.");
+  // The actual number of element in the vector which can be accessed.
+  std::size_t Size = 0;
+
+  // The position of the initial element of the page in the Data vector.
+  // Pages are allocated contiguously in the Data vector.
+  mutable std::vector PageToDataIdx;
+  // Actual page data. All the page elements are added to this vector on the
+  // first access of any of the elements of the page. Elements default
+  // constructed and elements of the page are stored contiguously. The order of
+  // the elements however depends on the order of access of the pages.
+  PointerIntPair Allocator;
+
+  constexpr static uintptr_t InvalidPage = SIZE_MAX;
+
+public:
+  using value_type = T;
+
+  // Default constructor. We build our own allocator.
+  PagedVector() : Allocator(new BumpPtrAllocator, true) {}
+  PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {}
+
+  ~PagedVector() {
+// If we own the allocator, delete it.
+if (Allocator.getInt() == true)
+  delete Allocator.getPointer();
+  }
+
+  // Lookup an element at position i.
+  // If the associated page is not filled, it will be filled with default
+  // constructed elements. If the associated page is filled, return the 
element.
+  T &operator[](std::size_t Index) const {
+assert(Index < Size);
+assert(Index / PAGE_SIZE < PageToDataIdx.size());
+uintptr_t &PagePtr = PageToDataIdx[Index / PAGE_SIZE];
+// If the page was not yet allocated, allocate it.
+if (PagePtr == InvalidPage) {
+  T *NewPagePtr = Allocator.getPointer()->template Allocate(PAGE_SIZE);
+  // We need to invoke the default constructor on all the elements of the
+  // page.
+  for (std::size_t I = 0; I < PAGE_SIZE; ++I)
+new (NewPagePtr + I) T();
+
+  PagePtr = reinterpret_cast(NewPagePtr);
+}
+// Dereference the element in the page.
+return *((Index % PAGE_SIZE) + reinterpret_cast(PagePtr));
+  }
+
+  // Return the capacity of the vector. I.e. the maximum size it can be 
expanded
+  // to with the resize method without allocating more pages.
+  [[nodiscard]] std::size_t capacity() const {
+return PageToDataIdx.size() * PAGE_SIZE;
+  }
+
+  // Return the size of the vector. I.e. the maximum index that can be
+  // accessed, i.e. the maximum value which was used as argument of the
+  // resize method.
+  [[nodiscard]] std::size_t size() const { return Size; }
+
+  // Expands the vector to the given NewSize number of elements.
+  // If the vector was smaller, allocates new pages as needed.
+  // It should be called only with NewSize >= Size.
+  void resize(std::size_t NewSize) {
+// Handle shrink case: delete the pages and update the size.
+if (NewSize < Size) {
+  std::size_t NewLastPage = (NewSize - 1) / PAGE_SIZE;
+  for (std::size_t I = NewLastPage + 1; I < PageToDataIdx.size(); ++I) {
+uintptr_t PagePtr = PageToDataIdx[I];
+if (PagePtr == InvalidPage)
+  continue;
+T *Page = reinterpret_cast(PagePtr);
+// We need to invoke the destructor on all the elements of the page.
+for (std::size_t J = 0; J < PAGE_SIZE; ++J)
+  Page[J].~T();
+Allocator.getPointer()->Deallocate(Page);
+  }
+  // Delete the extra ones in the new last page.
+  uintptr_t PagePtr = PageToDataIdx[NewLastPage];
+  if (PagePtr != InvalidPage) {
+T *Page = reinterpret_cast(PagePtr)

[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf resolved https://github.com/llvm/llvm-project/pull/66430
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Introduce paged vector (PR #66430)

2023-09-25 Thread Giulio Eulisse via cfe-commits

https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/66430

>From 4b02b55cccd73b0fe3efea52054735fca6e6c159 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+...@users.noreply.github.com>
Date: Thu, 14 Sep 2023 21:58:21 +0200
Subject: [PATCH] Introduce PagedVector class

The goal of the class is to be an (almost) drop in replacement for
SmallVector and std::vector when those are presized and filled later,
as it happens in SourceManager and ASTReader.

By splitting the actual vector in pages of the same size and allocating
the pages only when they are needed, using this containers reduces the
memory usage by a factor 4 for the cases relevant to the ALICE
experiment ROOT / cling usage.
---
 clang/include/clang/Basic/SourceManager.h |   3 +-
 clang/include/clang/Serialization/ASTReader.h |   5 +-
 clang/lib/Basic/SourceManager.cpp |  10 +-
 clang/lib/Serialization/ASTReader.cpp |   5 +-
 llvm/docs/ProgrammersManual.rst   |  33 ++
 llvm/include/llvm/ADT/PagedVector.h   | 323 +
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/PagedVectorTest.cpp| 342 ++
 8 files changed, 712 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/PagedVector.h
 create mode 100644 llvm/unittests/ADT/PagedVectorTest.cpp

diff --git a/clang/include/clang/Basic/SourceManager.h 
b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..e37caa2252532f9 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -43,6 +43,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase {
   ///
   /// Negative FileIDs are indexes into this table. To get from ID to an index,
   /// use (-ID - 2).
-  SmallVector LoadedSLocEntryTable;
+  llvm::PagedVector LoadedSLocEntryTable;
 
   /// The starting offset of the next local SLocEntry.
   ///
diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..65e19c6e44cf571 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -38,6 +38,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PagedVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the type with
   /// ID = (I + 1) << FastQual::Width has already been loaded
-  std::vector TypesLoaded;
+  llvm::PagedVector TypesLoaded;
 
   using GlobalTypeMapType =
   ContinuousRangeMap;
@@ -501,7 +502,7 @@ class ASTReader
   ///
   /// When the pointer at index I is non-NULL, the declaration with ID
   /// = I + 1 has already been loaded.
-  std::vector DeclsLoaded;
+  llvm::PagedVector DeclsLoaded;
 
   using GlobalDeclMapType =
   ContinuousRangeMap;
diff --git a/clang/lib/Basic/SourceManager.cpp 
b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..7fa8b8096ac4931 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2344,11 +2344,11 @@ SourceManager::MemoryBufferSizes 
SourceManager::getMemoryBufferSizes() const {
 }
 
 size_t SourceManager::getDataStructureSizes() const {
-  size_t size = llvm::capacity_in_bytes(MemBufferInfos)
-+ llvm::capacity_in_bytes(LocalSLocEntryTable)
-+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
-+ llvm::capacity_in_bytes(SLocEntryLoaded)
-+ llvm::capacity_in_bytes(FileInfos);
+  size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
+llvm::capacity_in_bytes(LocalSLocEntryTable) +
+llvm::capacity_in_bytes(LoadedSLocEntryTable) +
+llvm::capacity_in_bytes(SLocEntryLoaded) +
+llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
 size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..badd54987af18dd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7944,9 +7944,10 @@ void ASTReader::PrintStats() {
   std::fprintf(stderr, "*** AST File Statistics:\n");
 
   unsigned NumTypesLoaded =
-  TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+  TypesLoaded.size() - llvm::count(TypesLoaded.materialised(), QualType());
   unsigned NumDeclsLoaded =
-  DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+  DeclsLoaded.size() -
+  llvm::count(DeclsLoaded.materialised(), 

  1   2   3   4   >