https://github.com/lydkim created 
https://github.com/llvm/llvm-project/pull/169121

An OOM situation occurs in llvm-dwp when running big builds where sections are 
>4 GB. The problem is addressed in this post: 
https://github.com/llvm/llvm-project/issues/168923

Problem: 
When `ContentStorage` exceeds 4GB, the `uint32_t ContentEnd` field overflows 
when assigned in `doneAppending()`. Later in `getContentsForAppending()`, the 
calculation `Size = ContentEnd - ContentStart` causes unsigned integer 
underflow, producing large arbitrary memory allocations.

To address the OOM issue while keeping MCEncodedFragment size minimal, this 
solution transitions from a begin/end representation (`uint32_t ContentStart`, 
`uint32_t ContentEnd`) to a begin/size representation (`uint64_t ContentStart`, 
`uint32_t ContentSize`). The new approach changes `ContentStart` to 64-bit to 
support positions beyond 4GB, while `ContentSize` remains 32-bit, limiting 
individual fragments to 4GB.

This problem only exists in the llvm-21 branch. The recent refactoring in main 
branch makes this unnecessary there.

Fixes #168923

>From 233e674a7786f5551317848e9ea783d26494c325 Mon Sep 17 00:00:00 2001
From: Lydia Kim <[email protected]>
Date: Fri, 21 Nov 2025 11:41:23 -0800
Subject: [PATCH] [server-llvm-21][MC] Fixing vector overflow

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D87662897
---
 llvm/include/llvm/MC/MCSection.h | 25 +++++++++++++------------
 llvm/lib/MC/MCSection.cpp        |  4 ++--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 64b13972bfca1..9daaebf7e7935 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -298,8 +298,8 @@ class MCFragment {
 /// data.
 class MCEncodedFragment : public MCFragment {
   uint8_t BundlePadding = 0;
-  uint32_t ContentStart = 0;
-  uint32_t ContentEnd = 0;
+  uint32_t ContentSize = 0;
+  uint64_t ContentStart = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
@@ -360,22 +360,23 @@ class MCEncodedFragment : public MCFragment {
 
   // Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+  void clearContents() { ContentSize = 0; }
   // Get a SmallVector reference. The caller should call doneAppending to 
update
-  // `ContentEnd`.
+  // `ContentSize`.
   SmallVectorImpl<char> &getContentsForAppending() {
     SmallVectorImpl<char> &S = getParent()->ContentStorage;
-    if (LLVM_UNLIKELY(ContentEnd != S.size())) {
+    if (LLVM_UNLIKELY(ContentStart + ContentSize != S.size())) {
       // Move the elements to the end. Reserve space to avoid invalidating
       // S.begin()+I for `append`.
-      auto Size = ContentEnd - ContentStart;
       auto I = std::exchange(ContentStart, S.size());
-      S.reserve(S.size() + Size);
-      S.append(S.begin() + I, S.begin() + I + Size);
+      S.reserve(S.size() + ContentSize);
+      S.append(S.begin() + I, S.begin() + I + ContentSize);
     }
     return S;
   }
-  void doneAppending() { ContentEnd = getParent()->ContentStorage.size(); }
+  void doneAppending() {
+    ContentSize = getParent()->ContentStorage.size() - ContentStart;
+  }
   void appendContents(ArrayRef<char> Contents) {
     getContentsForAppending().append(Contents.begin(), Contents.end());
     doneAppending();
@@ -387,11 +388,11 @@ class MCEncodedFragment : public MCFragment {
   LLVM_ABI void setContents(ArrayRef<char> Contents);
   MutableArrayRef<char> getContents() {
     return MutableArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
+        .slice(ContentStart, ContentSize);
   }
   ArrayRef<char> getContents() const {
     return ArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
+        .slice(ContentStart, ContentSize);
   }
 
   // Fixup-related functions manage parent's storage using FixupStart and
@@ -409,7 +410,7 @@ class MCEncodedFragment : public MCFragment {
         .slice(FixupStart, FixupEnd - FixupStart);
   }
 
-  size_t getSize() const { return ContentEnd - ContentStart; }
+  size_t getSize() const { return ContentSize; }
 };
 
 /// Fragment for data and encoded instructions.
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index a7330692571de..97f591fbf0e28 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -84,11 +84,11 @@ LLVM_DUMP_METHOD void MCSection::dump(
 
 void MCEncodedFragment::setContents(ArrayRef<char> Contents) {
   auto &S = getParent()->ContentStorage;
-  if (ContentStart + Contents.size() > ContentEnd) {
+  if (Contents.size() > ContentSize) {
     ContentStart = S.size();
     S.resize_for_overwrite(S.size() + Contents.size());
   }
-  ContentEnd = ContentStart + Contents.size();
+  ContentSize = Contents.size();
   llvm::copy(Contents, S.begin() + ContentStart);
 }
 

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to