llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-mc

Author: Lydia Kim (lydkim)

<details>
<summary>Changes</summary>

An OOM situation occurs in llvm-dwp when running big builds where sections are 
&gt;4 GB. The problem is addressed in this post: 
https://github.com/llvm/llvm-project/issues/168923

Problem: 
When `ContentStorage` exceeds 4GB, the `uint32_t ContentEnd` field overflows 
when assigned in `doneAppending()`. Later in `getContentsForAppending()`, the 
calculation `Size = ContentEnd - ContentStart` causes unsigned integer 
underflow, producing large arbitrary memory allocations.

To address the OOM issue while keeping MCEncodedFragment size minimal, this 
solution transitions from a begin/end representation (`uint32_t ContentStart`, 
`uint32_t ContentEnd`) to a begin/size representation (`uint64_t ContentStart`, 
`uint32_t ContentSize`). The new approach changes `ContentStart` to 64-bit to 
support positions beyond 4GB, while `ContentSize` remains 32-bit, limiting 
individual fragments to 4GB.

This problem only exists in the llvm-21 branch. The recent refactoring in main 
branch makes this unnecessary there.

Fixes #<!-- -->168923

---
Full diff: https://github.com/llvm/llvm-project/pull/169121.diff


2 Files Affected:

- (modified) llvm/include/llvm/MC/MCSection.h (+13-12) 
- (modified) llvm/lib/MC/MCSection.cpp (+2-2) 


``````````diff
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 64b13972bfca1..9daaebf7e7935 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -298,8 +298,8 @@ class MCFragment {
 /// data.
 class MCEncodedFragment : public MCFragment {
   uint8_t BundlePadding = 0;
-  uint32_t ContentStart = 0;
-  uint32_t ContentEnd = 0;
+  uint32_t ContentSize = 0;
+  uint64_t ContentStart = 0;
   uint32_t FixupStart = 0;
   uint32_t FixupEnd = 0;
 
@@ -360,22 +360,23 @@ class MCEncodedFragment : public MCFragment {
 
   // Content-related functions manage parent's storage using ContentStart and
   // ContentSize.
-  void clearContents() { ContentEnd = ContentStart; }
+  void clearContents() { ContentSize = 0; }
   // Get a SmallVector reference. The caller should call doneAppending to 
update
-  // `ContentEnd`.
+  // `ContentSize`.
   SmallVectorImpl<char> &getContentsForAppending() {
     SmallVectorImpl<char> &S = getParent()->ContentStorage;
-    if (LLVM_UNLIKELY(ContentEnd != S.size())) {
+    if (LLVM_UNLIKELY(ContentStart + ContentSize != S.size())) {
       // Move the elements to the end. Reserve space to avoid invalidating
       // S.begin()+I for `append`.
-      auto Size = ContentEnd - ContentStart;
       auto I = std::exchange(ContentStart, S.size());
-      S.reserve(S.size() + Size);
-      S.append(S.begin() + I, S.begin() + I + Size);
+      S.reserve(S.size() + ContentSize);
+      S.append(S.begin() + I, S.begin() + I + ContentSize);
     }
     return S;
   }
-  void doneAppending() { ContentEnd = getParent()->ContentStorage.size(); }
+  void doneAppending() {
+    ContentSize = getParent()->ContentStorage.size() - ContentStart;
+  }
   void appendContents(ArrayRef<char> Contents) {
     getContentsForAppending().append(Contents.begin(), Contents.end());
     doneAppending();
@@ -387,11 +388,11 @@ class MCEncodedFragment : public MCFragment {
   LLVM_ABI void setContents(ArrayRef<char> Contents);
   MutableArrayRef<char> getContents() {
     return MutableArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
+        .slice(ContentStart, ContentSize);
   }
   ArrayRef<char> getContents() const {
     return ArrayRef(getParent()->ContentStorage)
-        .slice(ContentStart, ContentEnd - ContentStart);
+        .slice(ContentStart, ContentSize);
   }
 
   // Fixup-related functions manage parent's storage using FixupStart and
@@ -409,7 +410,7 @@ class MCEncodedFragment : public MCFragment {
         .slice(FixupStart, FixupEnd - FixupStart);
   }
 
-  size_t getSize() const { return ContentEnd - ContentStart; }
+  size_t getSize() const { return ContentSize; }
 };
 
 /// Fragment for data and encoded instructions.
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index a7330692571de..97f591fbf0e28 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -84,11 +84,11 @@ LLVM_DUMP_METHOD void MCSection::dump(
 
 void MCEncodedFragment::setContents(ArrayRef<char> Contents) {
   auto &S = getParent()->ContentStorage;
-  if (ContentStart + Contents.size() > ContentEnd) {
+  if (Contents.size() > ContentSize) {
     ContentStart = S.size();
     S.resize_for_overwrite(S.size() + Contents.size());
   }
-  ContentEnd = ContentStart + Contents.size();
+  ContentSize = Contents.size();
   llvm::copy(Contents, S.begin() + ContentStart);
 }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/169121
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to