[clang] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 (PR #204139)

Marlus Cadanus da Costa via cfe-commits Thu, 02 Jul 2026 07:46:14 -0700

https://github.com/marlus updated 
https://github.com/llvm/llvm-project/pull/204139


>From 6139f5fc7d54ee997789e725612141bcfb308917 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 08:42:14 -0400
Subject: [PATCH 01/21] [Clang] Fix offsetof sign-extending unsigned array
 indices >= 128

When evaluating __builtin_offsetof with an unsigned integer array index
(e.g. uint8_t, uint16_t) whose value has the high bit set, Clang was
calling getSExtValue() on the APSInt index, which sign-extends the value
and produces a large bogus offset.

Fix this by checking whether the APSInt is unsigned and using
getZExtValue() in that case instead.

Fixes #199319
---
 clang/lib/AST/ExprConstant.cpp                |  3 +-
 clang/test/Sema/offsetof-unsigned-index.c     | 25 +++++++++++++++
 .../test/SemaCXX/offsetof-unsigned-index.cpp  | 31 +++++++++++++++++++
 3 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Sema/offsetof-unsigned-index.c
 create mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 563d6b3bb0cf9..3178d3cf4a262 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19397,7 +19397,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
-      Result += IdxResult.getSExtValue() * ElementSize;
+      Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
+                                        : IdxResult.getSExtValue()) * 
ElementSize;
       break;
     }
 
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
new file mode 100644
index 0000000000000..480e486fbad72
--- /dev/null
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-fexperimental-new-constant-interpreter
+
+// expected-no-diagnostics
+
+// Test that offsetof correctly zero-extends unsigned array indices >= 128.
+// Previously, Clang would sign-extend uint8_t indices >= 128, producing
+// a large bogus offset value instead of the correct one.
+// https://github.com/llvm/llvm-project/issues/199319
+
+#include <stdint.h>
+#include <stddef.h>
+
+struct MyStruct {
+    void *ptrs[256];
+};
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 
* sizeof(void *),
+               "offsetof with uint8_t index 127 should be correct");
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 
* sizeof(void *),
+               "offsetof with uint8_t index 128 should be correctly 
zero-extended, not sign-extended");
+
+_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
+               "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
new file mode 100644
index 0000000000000..50bf50ef0fc35
--- /dev/null
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
+
+// expected-no-diagnostics
+
+// Test that offsetof correctly zero-extends unsigned array indices >= 128.
+// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
+// was set, producing a large bogus offset value instead of the correct one.
+// https://github.com/llvm/llvm-project/issues/199319
+
+#include <cstdint>
+#include <cstddef>
+
+struct MyStruct {
+    void *ptrs[256];
+};
+
+// uint8_t index: values >= 128 were incorrectly sign-extended
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * 
sizeof(void *),
+              "offsetof with uint8_t index 127 should be correct");
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * 
sizeof(void *),
+              "offsetof with uint8_t index 128 should be correctly 
zero-extended");
+static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * 
sizeof(void *),
+              "offsetof with uint8_t index 255 should be correctly 
zero-extended");
+
+// uint16_t index: values >= 32768 were also affected
+struct BigStruct {
+    char data[65536];
+};
+static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
+              "offsetof with uint16_t index 32768 should be correctly 
zero-extended");

>From 992c691791c57d2dcaef7a68c212a8dede83a917 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 14:52:53 -0400
Subject: [PATCH 02/21] Apply clang-format

---
 clang/lib/AST/ExprConstant.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3178d3cf4a262..01f001a887329 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19398,7 +19398,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
-                                        : IdxResult.getSExtValue()) * 
ElementSize;
+                                        : IdxResult.getSExtValue()) *
+                ElementSize;
       break;
     }
 

>From 712497aeb0a0d4823c56439bba925ec31f203607 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 15:02:17 -0400
Subject: [PATCH 03/21] Keep ByteCode compiler in sync: fix offsetof
 sign-extending unsigned array indices

---
 clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 95f9a2f507335..3f4054544f311 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3766,8 +3766,14 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
 
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64.
+      // Cast to Sint64. For unsigned types, cast to Uint64 first to
+      // avoid sign-extending values with the high bit set (e.g. uint8_t >= 
128).
       if (IndexT != PT_Sint64) {
+        if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
+          if (!this->emitCast(IndexT, PT_Uint64, E))
+            return false;
+          IndexT = PT_Uint64;
+        }
         if (!this->emitCast(IndexT, PT_Sint64, E))
           return false;
       }

>From 467dbf4ddbfce5840751752c8df55a14304b4e28 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:00:44 -0400
Subject: [PATCH 04/21] Reject negative and oversized offsetof array indices

---
 clang/lib/AST/ExprConstant.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 01f001a887329..32b9bb01b9d71 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19397,6 +19397,11 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
+      // Reject negative indices and indices too large to fit in int64_t,
+      // to avoid sign-extension issues or crashes in getZExtValue().
+      if (IdxResult.isSigned() ? IdxResult.isNegative()
+                               : IdxResult.ugt(APSInt::getMaxValue(64, 
/*Unsigned=*/false)))
+        return Error(OOE);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
                                         : IdxResult.getSExtValue()) *
                 ElementSize;

>From edd5b53a1c2834b080ed5b5518918a1ebcebd6ba Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:09:59 -0400
Subject: [PATCH 05/21] Reject negative and oversized offsetof array indices in
 ByteCode interpreter

Apply the same guards to InterpretOffsetOf in the ByteCode interpreter
(clang/lib/AST/ByteCode/InterpBuiltin.cpp) as were added to ExprConstant.cpp:
reject negative indices and unsigned indices that exceed INT64_MAX.

In the ByteCode path, indices arrive as int64_t after the Uint64->Sint64 cast
chain in Compiler.cpp. A negative int64_t covers both explicitly negative
signed indices and unsigned values >= 0x8000000000000000 (which wrap to
negative after the cast), so a single Index < 0 guard handles both cases.

Also extend the test files with expected-error cases for negative and
__uint128_t indices.
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp       |  8 ++++++++
 clang/test/Sema/offsetof-unsigned-index.c      | 11 +++++++++--
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 73952e032f1eb..0a0149e240c2f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6673,6 +6673,14 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       // When generating bytecode, we put all the index expressions as Sint64 
on
       // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
+      // Reject negative indices and unsigned indices that wrapped to negative
+      // after the Uint64->Sint64 cast (e.g. __uint128_t >= 
0x8000000000000000).
+      if (Index < 0) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_invalid_subexpr_in_const_expr)
+            << S.Current->getRange(OpPC);
+        return false;
+      }
       const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType);
       if (!AT)
         return false;
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 480e486fbad72..55f1e08f3a535 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -1,11 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-fexperimental-new-constant-interpreter
 
-// expected-no-diagnostics
-
 // Test that offsetof correctly zero-extends unsigned array indices >= 128.
 // Previously, Clang would sign-extend uint8_t indices >= 128, producing
 // a large bogus offset value instead of the correct one.
+// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
 #include <stdint.h>
@@ -15,6 +14,7 @@ struct MyStruct {
     void *ptrs[256];
 };
 
+// Unsigned indices that were previously sign-extended must be zero-extended.
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 
* sizeof(void *),
                "offsetof with uint8_t index 127 should be correct");
 
@@ -23,3 +23,10 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)128]) == 128 *
 
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
                "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
+
+// Negative indices must be rejected.
+struct NegIdxStruct { int a; int x[1]; };
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// __uint128_t indices >= 0x8000000000000000 must be rejected.
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 50bf50ef0fc35..2cea1f32be472 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -1,11 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
 
-// expected-no-diagnostics
-
 // Test that offsetof correctly zero-extends unsigned array indices >= 128.
 // Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
 // was set, producing a large bogus offset value instead of the correct one.
+// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
 #include <cstdint>
@@ -29,3 +28,10 @@ struct BigStruct {
 };
 static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
               "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
+
+// Negative indices must be rejected.
+struct NegIdxStruct { int a; int x[1]; };
+static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// __uint128_t indices >= 0x8000000000000000 must be rejected.
+static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}

>From 0664e5871f317c8058b12f853ed9fc487a0de42a Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 16 Jun 2026 16:20:17 -0400
Subject: [PATCH 06/21] clang-format: break long line in VisitOffsetOfExpr

---
 clang/lib/AST/ExprConstant.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 32b9bb01b9d71..0a24cf4053ce3 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19399,8 +19399,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       // Reject negative indices and indices too large to fit in int64_t,
       // to avoid sign-extension issues or crashes in getZExtValue().
+      APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
       if (IdxResult.isSigned() ? IdxResult.isNegative()
-                               : IdxResult.ugt(APSInt::getMaxValue(64, 
/*Unsigned=*/false)))
+                               : IdxResult.ugt(MaxIdx))
         return Error(OOE);
       Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
                                         : IdxResult.getSExtValue()) *

>From d74d4a64a54b8efab9fe445181f22c9a486d5f95 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Wed, 17 Jun 2026 09:40:26 -0400
Subject: [PATCH 07/21] Fix offsetof overflow and AP index handling

---
 clang/lib/AST/ByteCode/Compiler.cpp      |  7 +++++--
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++++-----
 clang/lib/AST/ExprConstant.cpp           | 23 ++++++++++++++++-------
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 3f4054544f311..128dd6f01c0ab 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3766,8 +3766,11 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
 
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64. For unsigned types, cast to Uint64 first to
-      // avoid sign-extending values with the high bit set (e.g. uint8_t >= 
128).
+      // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
+      // sign-extending values with the high bit set (e.g. uint8_t >= 128).
+      // AP types cannot be safely narrowed to Sint64; fail constant 
evaluation.
+      if (IndexT == PT_IntAP || IndexT == PT_IntAPS)
+        return false;
       if (IndexT != PT_Sint64) {
         if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
           if (!this->emitCast(IndexT, PT_Uint64, E))
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0a0149e240c2f..0c247f8b15fae 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6670,11 +6670,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       break;
     }
     case OffsetOfNode::Array: {
-      // When generating bytecode, we put all the index expressions as Sint64 
on
-      // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
-      // Reject negative indices and unsigned indices that wrapped to negative
-      // after the Uint64->Sint64 cast (e.g. __uint128_t >= 
0x8000000000000000).
       if (Index < 0) {
         S.FFDiag(S.Current->getLocation(OpPC),
                  diag::note_invalid_subexpr_in_const_expr)
@@ -6686,7 +6682,13 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
         return false;
       CurrentType = AT->getElementType();
       CharUnits ElementSize = 
S.getASTContext().getTypeSizeInChars(CurrentType);
-      Result += Index * ElementSize;
+      int64_t ElemSize = ElementSize.getQuantity();
+      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index)
+        return false;
+      int64_t Offset = Index * ElemSize;
+      if (Result.getQuantity() > llvm::maxIntN(64) - Offset)
+        return false;
+      Result += CharUnits::fromQuantity(Offset);
       ++ArrayIndex;
       break;
     }
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0a24cf4053ce3..46d341a8bc0c7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19397,15 +19397,24 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
         return Error(OOE);
       CurrentType = AT->getElementType();
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
-      // Reject negative indices and indices too large to fit in int64_t,
-      // to avoid sign-extension issues or crashes in getZExtValue().
+      // Reject negative indices, indices too large to fit in int64_t,
+      // and overflow in the offset computation.
       APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
-      if (IdxResult.isSigned() ? IdxResult.isNegative()
-                               : IdxResult.ugt(MaxIdx))
+      if (IdxResult.isSigned()
+              ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx))
+              : IdxResult.ugt(MaxIdx))
         return Error(OOE);
-      Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue()
-                                        : IdxResult.getSExtValue()) *
-                ElementSize;
+      int64_t IdxVal = IdxResult.isUnsigned()
+                           ? (int64_t)IdxResult.getZExtValue()
+                           : IdxResult.getSExtValue();
+      int64_t ElemSize = ElementSize.getQuantity();
+      if (IdxVal != 0 &&
+          ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)
+        return Error(OOE);
+      int64_t Offset = IdxVal * ElemSize;
+      if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset)
+        return Error(OOE);
+      Result += CharUnits::fromQuantity(Offset);
       break;
     }
 

>From 61c46e1893b382ed4639b84d9f41952dfdcdf23e Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Wed, 17 Jun 2026 10:06:38 -0400
Subject: [PATCH 08/21] Add tests for __uint128_t > UINT64_MAX and multiply
 overflow

---
 clang/test/Sema/offsetof-unsigned-index.c      | 10 ++++++++++
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 55f1e08f3a535..1b34ebe60443f 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -30,3 +30,13 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[-1]) == 0, ""); // expe
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+
+// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
+// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
+// wrong result instead of an error).
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}} 
expected-note {{subexpression not valid in a constant expression}}
+
+// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
+// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
+struct ShortArray { short data[2]; };
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 2cea1f32be472..2b3995d405705 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -35,3 +35,13 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, 
""); // expected-err
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+
+// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
+// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
+// wrong result instead of an error).
+static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+
+// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
+// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
+struct ShortArray { short data[2]; };
+static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}

>From 1a2d73f466710bf551392aef641f0f12fab6a032 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Fri, 19 Jun 2026 09:54:59 -0400
Subject: [PATCH 09/21] Handle small AP-typed offsetof indices correctly

---
 clang/lib/AST/ByteCode/Compiler.cpp            | 16 +++++++++++++---
 clang/test/Sema/offsetof-unsigned-index.c      |  5 +++++
 clang/test/SemaCXX/offsetof-unsigned-index.cpp |  5 +++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 128dd6f01c0ab..2667bac8733a4 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3764,13 +3764,23 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
         continue;
       }
 
+      if (IndexT == PT_IntAP || IndexT == PT_IntAPS) {
+        // AP types (e.g. __uint128_t, __int128) cannot be safely cast to
+        // Sint64. Evaluate the constant and push it directly as Sint64.
+        Expr::EvalResult EvalResult;
+        if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext()))
+          return false;
+        llvm::APSInt IdxVal = EvalResult.Val.getInt();
+        if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64))
+          return false;
+        if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E))
+          return false;
+        continue;
+      }
       if (!this->visit(ArrayIndexExpr))
         return false;
       // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
       // sign-extending values with the high bit set (e.g. uint8_t >= 128).
-      // AP types cannot be safely narrowed to Sint64; fail constant 
evaluation.
-      if (IndexT == PT_IntAP || IndexT == PT_IntAPS)
-        return false;
       if (IndexT != PT_Sint64) {
         if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
           if (!this->emitCast(IndexT, PT_Uint64, E))
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 1b34ebe60443f..57bce06a25c98 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -31,6 +31,11 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[-1]) == 0, ""); // expe
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
 
+// Small __uint128_t values that fit in int64_t must work correctly.
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) ==
+               __builtin_offsetof(struct NegIdxStruct, x),
+               "offsetof with __uint128_t index 0 should work");
+
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 2b3995d405705..880c45dec0e3e 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -36,6 +36,11 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, 
""); // expected-err
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
 static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
 
+// Small __uint128_t values that fit in int64_t must work correctly.
+static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
+              __builtin_offsetof(NegIdxStruct, x),
+              "offsetof with __uint128_t index 0 should work");
+
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).

>From 1060b4331649596afa944e2ae76ee1cba4346f97 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 23 Jun 2026 09:49:51 -0400
Subject: [PATCH 10/21] Fix offsetof AP index handling: add CastNoOverflow
 opcode and APInt crash fix

- ExprConstant.cpp: replace sgt/ugt(MaxIdx) with getActiveBits() > 63 to
  avoid APInt bit-width assertion crash when comparing 128-bit values
- Opcodes.td: add APOnlyTypeClass (IntAP, IntAPS) and CastNoOverflow opcode
- Interp.h: implement CastNoOverflow -- pops AP value, rejects negative or
  values that don't fit in int64_t (activeBits > 63), pushes as Sint64
- Compiler.cpp: for AP types, emit visit + CastNoOverflow instead of
  calling EvaluateAsInt (which is not allowed in the compiler phase)
---
 clang/lib/AST/ByteCode/Compiler.cpp            | 10 ++--------
 clang/lib/AST/ByteCode/Interp.h                | 17 +++++++++++++++++
 clang/lib/AST/ByteCode/Opcodes.td              | 10 ++++++++++
 clang/lib/AST/ExprConstant.cpp                 |  5 +----
 clang/test/Sema/offsetof-unsigned-index.c      | 11 +++++++----
 clang/test/SemaCXX/offsetof-unsigned-index.cpp | 15 +++++++++------
 6 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 2667bac8733a4..41685b0b1a257 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3765,15 +3765,9 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
       }
 
       if (IndexT == PT_IntAP || IndexT == PT_IntAPS) {
-        // AP types (e.g. __uint128_t, __int128) cannot be safely cast to
-        // Sint64. Evaluate the constant and push it directly as Sint64.
-        Expr::EvalResult EvalResult;
-        if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext()))
+        if (!this->visit(ArrayIndexExpr))
           return false;
-        llvm::APSInt IdxVal = EvalResult.Val.getInt();
-        if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64))
-          return false;
-        if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E))
+        if (!this->emitCastNoOverflow(IndexT, E))
           return false;
         continue;
       }
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 97d9e7cc14e32..2aaf82dcd4051 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2884,6 +2884,23 @@ bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t 
BitWidth) {
   return true;
 }
 
+// Cast an AP integer to Sint64, failing constant evaluation if the value is
+// negative or too large to fit (i.e. truncation would change the value).
+template <PrimType Name, class T = typename PrimConv<Name>::T>
+bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
+  T Source = S.Stk.pop<T>();
+  APSInt Val = Source.toAPSInt();
+  if (Val.isNegative() || Val.getActiveBits() > 63) {
+    S.FFDiag(S.Current->getLocation(OpPC),
+             diag::note_invalid_subexpr_in_const_expr)
+        << S.Current->getRange(OpPC);
+    return false;
+  }
+  S.Stk.push<Integral<64, true>>(
+      Integral<64, true>::from((int64_t)Val.getZExtValue()));
+  return true;
+}
+
 template <PrimType Name, class T = typename PrimConv<Name>::T>
 bool CastIntegralFloating(InterpState &S, CodePtr OpPC,
                           const llvm::fltSemantics *Sem, uint32_t FPOI) {
diff --git a/clang/lib/AST/ByteCode/Opcodes.td 
b/clang/lib/AST/ByteCode/Opcodes.td
index b6190554178f3..7cda124b40e24 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -748,6 +748,16 @@ def CastAPS : Opcode {
   let HasGroup = 1;
 }
 
+def APOnlyTypeClass : TypeClass {
+  let Types = [IntAP, IntAPS];
+}
+
+// Cast from an AP integer type to Sint64, failing if the value doesn't fit.
+def CastNoOverflow : Opcode {
+  let Types = [APOnlyTypeClass];
+  let HasGroup = 1;
+}
+
 // Cast an integer to a floating type
 def CastIntegralFloating : Opcode {
   let Types = [AluTypeClass];
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 46d341a8bc0c7..aa317ee2d106d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19399,10 +19399,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType);
       // Reject negative indices, indices too large to fit in int64_t,
       // and overflow in the offset computation.
-      APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false);
-      if (IdxResult.isSigned()
-              ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx))
-              : IdxResult.ugt(MaxIdx))
+      if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63)
         return Error(OOE);
       int64_t IdxVal = IdxResult.isUnsigned()
                            ? (int64_t)IdxResult.getZExtValue()
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 57bce06a25c98..075a96b0e8163 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -26,22 +26,25 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)255]) == 255 *
 
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
 
 // Small __uint128_t values that fit in int64_t must work correctly.
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) ==
                __builtin_offsetof(struct NegIdxStruct, x),
                "offsetof with __uint128_t index 0 should work");
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)1]) ==
+               __builtin_offsetof(struct NegIdxStruct, x) + sizeof(int),
+               "offsetof with __uint128_t index 1 should work");
 
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
-_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}} 
expected-note {{subexpression not valid in a constant expression}}
+_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 
64)]) == 0, ""); // expected-error {{not an integral constant expression}}
 
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
index 880c45dec0e3e..22d0eda636f08 100644
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
@@ -7,8 +7,8 @@
 // Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
-#include <cstdint>
-#include <cstddef>
+#include <stdint.h>
+#include <stddef.h>
 
 struct MyStruct {
     void *ptrs[256];
@@ -31,22 +31,25 @@ static_assert(__builtin_offsetof(BigStruct, 
data[(uint16_t)32768]) == 32768,
 
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
-static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
 
 // __uint128_t indices >= 0x8000000000000000 must be rejected.
-static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
 
 // Small __uint128_t values that fit in int64_t must work correctly.
 static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
               __builtin_offsetof(NegIdxStruct, x),
               "offsetof with __uint128_t index 0 should work");
+static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) ==
+              __builtin_offsetof(NegIdxStruct, x) + sizeof(int),
+              "offsetof with __uint128_t index 1 should work");
 
 // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
 // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
 // wrong result instead of an error).
-static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}} expected-note 
{{subexpression not valid in a constant expression}}
+static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}}
 
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{subexpression not valid in a 
constant expression}}
+static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}

>From ee845e9b82223c8237a2960df803366e34cf7895 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Tue, 23 Jun 2026 14:36:29 -0400
Subject: [PATCH 11/21] Use APSInt::getExtValue() in offsetof index evaluation

---
 clang/lib/AST/ExprConstant.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index aa317ee2d106d..c33dfe8afacea 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19401,9 +19401,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       // and overflow in the offset computation.
       if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63)
         return Error(OOE);
-      int64_t IdxVal = IdxResult.isUnsigned()
-                           ? (int64_t)IdxResult.getZExtValue()
-                           : IdxResult.getSExtValue();
+      int64_t IdxVal = IdxResult.getExtValue();
       int64_t ElemSize = ElementSize.getQuantity();
       if (IdxVal != 0 &&
           ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)

>From d0e9aae9aaa8ce5725b15cae11bdddf8f1eea444 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:24:34 -0400
Subject: [PATCH 12/21] Move BigStruct/uint16_t test to .c file and drop .cpp
 test

No C++-specific logic in the test; move uint16_t/BigStruct case to the
.c test file and delete the redundant SemaCXX variant.

Addresses reviewer comment from efriedma-quic.
---
 clang/test/Sema/offsetof-unsigned-index.c     |  5 ++
 .../test/SemaCXX/offsetof-unsigned-index.cpp  | 55 -------------------
 2 files changed, 5 insertions(+), 55 deletions(-)
 delete mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index 075a96b0e8163..d873175c22eb8 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -24,6 +24,11 @@ _Static_assert(__builtin_offsetof(struct MyStruct, 
ptrs[(uint8_t)128]) == 128 *
 _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 
* sizeof(void *),
                "offsetof with uint8_t index 255 should be correctly 
zero-extended, not sign-extended");
 
+// uint16_t index: values >= 32768 were also affected by sign-extension.
+struct BigStruct { char data[65536]; };
+_Static_assert(__builtin_offsetof(struct BigStruct, data[(uint16_t)32768]) == 
32768,
+               "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
+
 // Negative indices must be rejected.
 struct NegIdxStruct { int a; int x[1]; };
 _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp 
b/clang/test/SemaCXX/offsetof-unsigned-index.cpp
deleted file mode 100644
index 22d0eda636f08..0000000000000
--- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11
-// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-std=c++11 -fexperimental-new-constant-interpreter
-
-// Test that offsetof correctly zero-extends unsigned array indices >= 128.
-// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit
-// was set, producing a large bogus offset value instead of the correct one.
-// Also tests that negative indices and oversized __uint128_t indices are 
rejected.
-// https://github.com/llvm/llvm-project/issues/199319
-
-#include <stdint.h>
-#include <stddef.h>
-
-struct MyStruct {
-    void *ptrs[256];
-};
-
-// uint8_t index: values >= 128 were incorrectly sign-extended
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * 
sizeof(void *),
-              "offsetof with uint8_t index 127 should be correct");
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * 
sizeof(void *),
-              "offsetof with uint8_t index 128 should be correctly 
zero-extended");
-static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * 
sizeof(void *),
-              "offsetof with uint8_t index 255 should be correctly 
zero-extended");
-
-// uint16_t index: values >= 32768 were also affected
-struct BigStruct {
-    char data[65536];
-};
-static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768,
-              "offsetof with uint16_t index 32768 should be correctly 
zero-extended");
-
-// Negative indices must be rejected.
-struct NegIdxStruct { int a; int x[1]; };
-static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // 
expected-error {{not an integral constant expression}}
-
-// __uint128_t indices >= 0x8000000000000000 must be rejected.
-static_assert(__builtin_offsetof(NegIdxStruct, 
x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an 
integral constant expression}}
-
-// Small __uint128_t values that fit in int64_t must work correctly.
-static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) ==
-              __builtin_offsetof(NegIdxStruct, x),
-              "offsetof with __uint128_t index 0 should work");
-static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) ==
-              __builtin_offsetof(NegIdxStruct, x) + sizeof(int),
-              "offsetof with __uint128_t index 1 should work");
-
-// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero:
-// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a
-// wrong result instead of an error).
-static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 
0, ""); // expected-error {{not an integral constant expression}}
-
-// A uint64_t index that causes index*sizeof(element) to overflow int64_t must
-// be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
-struct ShortArray { short data[2]; };
-static_assert(__builtin_offsetof(ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}

>From 9ec2151c158b192cb6f8785253bdb2faf7cf501b Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:33:22 -0400
Subject: [PATCH 13/21] Add specific diagnostic for overflow in offsetof
 computation

Instead of the generic 'subexpression not valid in a constant expression'
note, emit 'overflow in offsetof' when the multiply or add step of the
offsetof calculation overflows int64_t.

Add note_constexpr_offsetof_overflow to DiagnosticASTKinds.td and use it
in both the ExprConstant and ByteCode (InterpBuiltin) paths. Update the
test to expect the new note.

Addresses reviewer comment from efriedma-quic.
---
 clang/include/clang/Basic/DiagnosticASTKinds.td |  2 ++
 clang/lib/AST/ByteCode/InterpBuiltin.cpp        | 12 ++++++++++--
 clang/lib/AST/ExprConstant.cpp                  |  4 ++--
 clang/test/Sema/offsetof-unsigned-index.c       |  2 +-
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td 
b/clang/include/clang/Basic/DiagnosticASTKinds.td
index bde418695f647..f86f0157b2b1f 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -24,6 +24,8 @@ def note_constexpr_invalid_downcast : Note<
   "cannot cast object of dynamic type %0 to type %1">;
 def note_constexpr_overflow : Note<
   "value %0 is outside the range of representable values of type %1">;
+def note_constexpr_offsetof_overflow : Note<
+  "overflow in offsetof">;
 def note_constexpr_negative_shift : Note<"negative shift count %0">;
 def note_constexpr_large_shift : Note<
   "shift count %0 >= width of type %1 (%2 bit%s2)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0c247f8b15fae..b182820a0d028 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6683,11 +6683,19 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       CurrentType = AT->getElementType();
       CharUnits ElementSize = 
S.getASTContext().getTypeSizeInChars(CurrentType);
       int64_t ElemSize = ElementSize.getQuantity();
-      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index)
+      if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_constexpr_offsetof_overflow)
+            << S.Current->getRange(OpPC);
         return false;
+      }
       int64_t Offset = Index * ElemSize;
-      if (Result.getQuantity() > llvm::maxIntN(64) - Offset)
+      if (Result.getQuantity() > llvm::maxIntN(64) - Offset) {
+        S.FFDiag(S.Current->getLocation(OpPC),
+                 diag::note_constexpr_offsetof_overflow)
+            << S.Current->getRange(OpPC);
         return false;
+      }
       Result += CharUnits::fromQuantity(Offset);
       ++ArrayIndex;
       break;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c33dfe8afacea..e2cc1d7c85872 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -19405,10 +19405,10 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const 
OffsetOfExpr *OOE) {
       int64_t ElemSize = ElementSize.getQuantity();
       if (IdxVal != 0 &&
           ElemSize > std::numeric_limits<int64_t>::max() / IdxVal)
-        return Error(OOE);
+        return Error(OOE, diag::note_constexpr_offsetof_overflow);
       int64_t Offset = IdxVal * ElemSize;
       if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset)
-        return Error(OOE);
+        return Error(OOE, diag::note_constexpr_offsetof_overflow);
       Result += CharUnits::fromQuantity(Offset);
       break;
     }
diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index d873175c22eb8..cafd2ddff58f8 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -52,4 +52,4 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, 
x[((__uint128_t)1 << 64)]
 // A uint64_t index that causes index*sizeof(element) to overflow int64_t must
 // be rejected.  4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX.
 struct ShortArray { short data[2]; };
-_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}}
+_Static_assert(__builtin_offsetof(struct ShortArray, 
data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an 
integral constant expression}} expected-note {{overflow in offsetof}}

>From fec17a8c32c13d2e4784b15fbf0a3893c1dadd7c Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:35:54 -0400
Subject: [PATCH 14/21] Replace standard includes in test with manual typedefs

Remove #include <stdint.h> and <stddef.h> from offsetof-unsigned-index.c
and add manual typedef declarations instead, following the convention that
clang test files should not use standard includes.

Addresses reviewer comment from tbaederr.
---
 clang/test/Sema/offsetof-unsigned-index.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/test/Sema/offsetof-unsigned-index.c 
b/clang/test/Sema/offsetof-unsigned-index.c
index cafd2ddff58f8..56396eb2a12e6 100644
--- a/clang/test/Sema/offsetof-unsigned-index.c
+++ b/clang/test/Sema/offsetof-unsigned-index.c
@@ -7,8 +7,9 @@
 // Also tests that negative indices and oversized __uint128_t indices are 
rejected.
 // https://github.com/llvm/llvm-project/issues/199319
 
-#include <stdint.h>
-#include <stddef.h>
+typedef unsigned char      uint8_t;
+typedef unsigned short     uint16_t;
+typedef unsigned long long uint64_t;
 
 struct MyStruct {
     void *ptrs[256];

>From d09c2beccdbbb572617dc31d7094c663ab37eb56 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:37:34 -0400
Subject: [PATCH 15/21] Update clang/lib/AST/ByteCode/Interp.h

Co-authored-by: Timm Baeder <[email protected]>
---
 clang/lib/AST/ByteCode/Interp.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 2aaf82dcd4051..1ce15cf7459dd 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2891,10 +2891,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
   T Source = S.Stk.pop<T>();
   APSInt Val = Source.toAPSInt();
   if (Val.isNegative() || Val.getActiveBits() > 63) {
-    S.FFDiag(S.Current->getLocation(OpPC),
-             diag::note_invalid_subexpr_in_const_expr)
-        << S.Current->getRange(OpPC);
-    return false;
+return Invalid(S, OpPC);
   }
   S.Stk.push<Integral<64, true>>(
       Integral<64, true>::from((int64_t)Val.getZExtValue()));

>From e7941837ce93c0ec5e410c45ab836ed1825ae886 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:38:51 -0400
Subject: [PATCH 16/21] Fix indentation of Invalid(S, OpPC) in CastNoOverflow

---
 clang/lib/AST/ByteCode/Interp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 1ce15cf7459dd..28cb235c7d781 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2891,7 +2891,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
   T Source = S.Stk.pop<T>();
   APSInt Val = Source.toAPSInt();
   if (Val.isNegative() || Val.getActiveBits() > 63) {
-return Invalid(S, OpPC);
+    return Invalid(S, OpPC);
   }
   S.Stk.push<Integral<64, true>>(
       Integral<64, true>::from((int64_t)Val.getZExtValue()));

>From 39cb083dd528a1605c9612e6376e32d66085ebac Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:45:14 -0400
Subject: [PATCH 17/21] Remove unnecessary two-step cast in VisitOffsetOfExpr
 ByteCode path

The intermediate Uint64 step was not needed: Cast<PT_UintN, PT_Sint64>
uses a C++ implicit conversion which already correctly zero-extends
unsigned values. Revert the non-AP path to the original direct
emitCast(IndexT, PT_Sint64).

Addresses reviewer comment from tbaederr.
---
 clang/lib/AST/ByteCode/Compiler.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp 
b/clang/lib/AST/ByteCode/Compiler.cpp
index 41685b0b1a257..2458860f44eb5 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -3773,14 +3773,8 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const 
OffsetOfExpr *E) {
       }
       if (!this->visit(ArrayIndexExpr))
         return false;
-      // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid
-      // sign-extending values with the high bit set (e.g. uint8_t >= 128).
+      // Cast to Sint64.
       if (IndexT != PT_Sint64) {
-        if (!isSignedType(IndexT) && IndexT != PT_Uint64) {
-          if (!this->emitCast(IndexT, PT_Uint64, E))
-            return false;
-          IndexT = PT_Uint64;
-        }
         if (!this->emitCast(IndexT, PT_Sint64, E))
           return false;
       }

>From e22b2a826009f77874409edc791358dce249d602 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Thu, 25 Jun 2026 09:58:18 -0400
Subject: [PATCH 18/21] Add release note for offsetof unsigned index
 sign-extension fix

---
 clang/docs/ReleaseNotes.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/docs/ReleaseNotes.md b/clang/docs/ReleaseNotes.md
index 0f252fdf3601b..37e78b93a7217 100644
--- a/clang/docs/ReleaseNotes.md
+++ b/clang/docs/ReleaseNotes.md
@@ -759,6 +759,9 @@ latest release, please see the [Clang Web 
Site](https://clang.llvm.org) or the
   calling `__builtin_bit_cast`. (#GH200112)
 - Clang now SFINAE friendly when the ``__reference_meows_from_temporary`` 
builtins
   should SFINAE friendly when the 1st type is not a reference type. (#GH206524)
+- Fixed `__builtin_offsetof` incorrectly sign-extending unsigned array indices
+  with the high bit set (e.g. `uint8_t` values >= 128), which produced wrong
+  offset values in constant expressions. (#GH199319)
 
 
 #### Bug Fixes to Attribute Support

>From d7cafdf2ee3928e3e44cf7c7e5daad897dc27736 Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Fri, 26 Jun 2026 09:13:51 -0400
Subject: [PATCH 19/21] Update clang/lib/AST/ByteCode/Interp.h

Co-authored-by: Timm Baeder <[email protected]>
---
 clang/lib/AST/ByteCode/Interp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 28cb235c7d781..84564d0a33cff 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -2890,9 +2890,8 @@ template <PrimType Name, class T = typename 
PrimConv<Name>::T>
 bool CastNoOverflow(InterpState &S, CodePtr OpPC) {
   T Source = S.Stk.pop<T>();
   APSInt Val = Source.toAPSInt();
-  if (Val.isNegative() || Val.getActiveBits() > 63) {
+  if (Val.isNegative() || Val.getActiveBits() > 63)
     return Invalid(S, OpPC);
-  }
   S.Stk.push<Integral<64, true>>(
       Integral<64, true>::from((int64_t)Val.getZExtValue()));
   return true;

>From 75b62d4fe5a606deebae65ae35b8fea524cac58e Mon Sep 17 00:00:00 2001
From: Marlus Cadanus da Costa <[email protected]>
Date: Fri, 26 Jun 2026 09:14:11 -0400
Subject: [PATCH 20/21] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp

Co-authored-by: Timm Baeder <[email protected]>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b182820a0d028..ab5a4d2bab41b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6671,12 +6671,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
     }
     case OffsetOfNode::Array: {
       int64_t Index = ArrayIndices[ArrayIndex];
-      if (Index < 0) {
-        S.FFDiag(S.Current->getLocation(OpPC),
-                 diag::note_invalid_subexpr_in_const_expr)
-            << S.Current->getRange(OpPC);
-        return false;
-      }
+      if (Index < 0)
+        return Invalid(S, OpPC);
       const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType);
       if (!AT)
         return false;

>From 3fb104a7f1261ab1993aaa65646c2fea0256d400 Mon Sep 17 00:00:00 2001
From: Cadanus da Costa <[email protected]>
Date: Fri, 26 Jun 2026 09:15:42 -0400
Subject: [PATCH 21/21] Restore accidentally removed comment in
 InterpretOffsetOf

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ab5a4d2bab41b..14717a077075a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -6670,6 +6670,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, 
const OffsetOfExpr *E,
       break;
     }
     case OffsetOfNode::Array: {
+      // When generating bytecode, we put all the index expressions as Sint64 
on
+      // the stack.
       int64_t Index = ArrayIndices[ArrayIndex];
       if (Index < 0)
         return Invalid(S, OpPC);

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 (PR #204139)

Reply via email to