https://github.com/marlus updated https://github.com/llvm/llvm-project/pull/204139
>From 6139f5fc7d54ee997789e725612141bcfb308917 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 08:42:14 -0400 Subject: [PATCH 01/21] [Clang] Fix offsetof sign-extending unsigned array indices >= 128 When evaluating __builtin_offsetof with an unsigned integer array index (e.g. uint8_t, uint16_t) whose value has the high bit set, Clang was calling getSExtValue() on the APSInt index, which sign-extends the value and produces a large bogus offset. Fix this by checking whether the APSInt is unsigned and using getZExtValue() in that case instead. Fixes #199319 --- clang/lib/AST/ExprConstant.cpp | 3 +- clang/test/Sema/offsetof-unsigned-index.c | 25 +++++++++++++++ .../test/SemaCXX/offsetof-unsigned-index.cpp | 31 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 clang/test/Sema/offsetof-unsigned-index.c create mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 563d6b3bb0cf9..3178d3cf4a262 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19397,7 +19397,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - Result += IdxResult.getSExtValue() * ElementSize; + Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue()) * ElementSize; break; } diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c new file mode 100644 index 0000000000000..480e486fbad72 --- /dev/null +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t indices >= 128, producing +// a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <stdint.h> +#include <stddef.h> + +struct MyStruct { + void *ptrs[256]; +}; + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended, not sign-extended"); + +_Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp new file mode 100644 index 0000000000000..50bf50ef0fc35 --- /dev/null +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter + +// expected-no-diagnostics + +// Test that offsetof correctly zero-extends unsigned array indices >= 128. +// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit +// was set, producing a large bogus offset value instead of the correct one. +// https://github.com/llvm/llvm-project/issues/199319 + +#include <cstdint> +#include <cstddef> + +struct MyStruct { + void *ptrs[256]; +}; + +// uint8_t index: values >= 128 were incorrectly sign-extended +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), + "offsetof with uint8_t index 127 should be correct"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), + "offsetof with uint8_t index 128 should be correctly zero-extended"); +static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), + "offsetof with uint8_t index 255 should be correctly zero-extended"); + +// uint16_t index: values >= 32768 were also affected +struct BigStruct { + char data[65536]; +}; +static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, + "offsetof with uint16_t index 32768 should be correctly zero-extended"); >From 992c691791c57d2dcaef7a68c212a8dede83a917 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 14:52:53 -0400 Subject: [PATCH 02/21] Apply clang-format --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3178d3cf4a262..01f001a887329 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19398,7 +19398,8 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * ElementSize; + : IdxResult.getSExtValue()) * + ElementSize; break; } >From 712497aeb0a0d4823c56439bba925ec31f203607 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 15:02:17 -0400 Subject: [PATCH 03/21] Keep ByteCode compiler in sync: fix offsetof sign-extending unsigned array indices --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 95f9a2f507335..3f4054544f311 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3766,8 +3766,14 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. + // Cast to Sint64. For unsigned types, cast to Uint64 first to + // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). if (IndexT != PT_Sint64) { + if (!isSignedType(IndexT) && IndexT != PT_Uint64) { + if (!this->emitCast(IndexT, PT_Uint64, E)) + return false; + IndexT = PT_Uint64; + } if (!this->emitCast(IndexT, PT_Sint64, E)) return false; } >From 467dbf4ddbfce5840751752c8df55a14304b4e28 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:00:44 -0400 Subject: [PATCH 04/21] Reject negative and oversized offsetof array indices --- clang/lib/AST/ExprConstant.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 01f001a887329..32b9bb01b9d71 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19397,6 +19397,11 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); + // Reject negative indices and indices too large to fit in int64_t, + // to avoid sign-extension issues or crashes in getZExtValue(). + if (IdxResult.isSigned() ? IdxResult.isNegative() + : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * ElementSize; >From edd5b53a1c2834b080ed5b5518918a1ebcebd6ba Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:09:59 -0400 Subject: [PATCH 05/21] Reject negative and oversized offsetof array indices in ByteCode interpreter Apply the same guards to InterpretOffsetOf in the ByteCode interpreter (clang/lib/AST/ByteCode/InterpBuiltin.cpp) as were added to ExprConstant.cpp: reject negative indices and unsigned indices that exceed INT64_MAX. In the ByteCode path, indices arrive as int64_t after the Uint64->Sint64 cast chain in Compiler.cpp. A negative int64_t covers both explicitly negative signed indices and unsigned values >= 0x8000000000000000 (which wrap to negative after the cast), so a single Index < 0 guard handles both cases. Also extend the test files with expected-error cases for negative and __uint128_t indices. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/test/Sema/offsetof-unsigned-index.c | 11 +++++++++-- clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 73952e032f1eb..0a0149e240c2f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6673,6 +6673,14 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, // When generating bytecode, we put all the index expressions as Sint64 on // the stack. int64_t Index = ArrayIndices[ArrayIndex]; + // Reject negative indices and unsigned indices that wrapped to negative + // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). + if (Index < 0) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 480e486fbad72..55f1e08f3a535 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t indices >= 128, producing // a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <stdint.h> @@ -15,6 +14,7 @@ struct MyStruct { void *ptrs[256]; }; +// Unsigned indices that were previously sign-extended must be zero-extended. _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), "offsetof with uint8_t index 127 should be correct"); @@ -23,3 +23,10 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 50bf50ef0fc35..2cea1f32be472 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -1,11 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter -// expected-no-diagnostics - // Test that offsetof correctly zero-extends unsigned array indices >= 128. // Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit // was set, producing a large bogus offset value instead of the correct one. +// Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 #include <cstdint> @@ -29,3 +28,10 @@ struct BigStruct { }; static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, "offsetof with uint16_t index 32768 should be correctly zero-extended"); + +// Negative indices must be rejected. +struct NegIdxStruct { int a; int x[1]; }; +static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices >= 0x8000000000000000 must be rejected. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From 0664e5871f317c8058b12f853ed9fc487a0de42a Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 16 Jun 2026 16:20:17 -0400 Subject: [PATCH 06/21] clang-format: break long line in VisitOffsetOfExpr --- clang/lib/AST/ExprConstant.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 32b9bb01b9d71..0a24cf4053ce3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19399,8 +19399,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices and indices too large to fit in int64_t, // to avoid sign-extension issues or crashes in getZExtValue(). + APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(APSInt::getMaxValue(64, /*Unsigned=*/false))) + : IdxResult.ugt(MaxIdx)) return Error(OOE); Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() : IdxResult.getSExtValue()) * >From d74d4a64a54b8efab9fe445181f22c9a486d5f95 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 09:40:26 -0400 Subject: [PATCH 07/21] Fix offsetof overflow and AP index handling --- clang/lib/AST/ByteCode/Compiler.cpp | 7 +++++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 +++++++----- clang/lib/AST/ExprConstant.cpp | 23 ++++++++++++++++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 3f4054544f311..128dd6f01c0ab 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3766,8 +3766,11 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. For unsigned types, cast to Uint64 first to - // avoid sign-extending values with the high bit set (e.g. uint8_t >= 128). + // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid + // sign-extending values with the high bit set (e.g. uint8_t >= 128). + // AP types cannot be safely narrowed to Sint64; fail constant evaluation. + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) + return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0a0149e240c2f..0c247f8b15fae 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6670,11 +6670,7 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, break; } case OffsetOfNode::Array: { - // When generating bytecode, we put all the index expressions as Sint64 on - // the stack. int64_t Index = ArrayIndices[ArrayIndex]; - // Reject negative indices and unsigned indices that wrapped to negative - // after the Uint64->Sint64 cast (e.g. __uint128_t >= 0x8000000000000000). if (Index < 0) { S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) @@ -6686,7 +6682,13 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, return false; CurrentType = AT->getElementType(); CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); - Result += Index * ElementSize; + int64_t ElemSize = ElementSize.getQuantity(); + if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) + return false; + int64_t Offset = Index * ElemSize; + if (Result.getQuantity() > llvm::maxIntN(64) - Offset) + return false; + Result += CharUnits::fromQuantity(Offset); ++ArrayIndex; break; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0a24cf4053ce3..46d341a8bc0c7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19397,15 +19397,24 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { return Error(OOE); CurrentType = AT->getElementType(); CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); - // Reject negative indices and indices too large to fit in int64_t, - // to avoid sign-extension issues or crashes in getZExtValue(). + // Reject negative indices, indices too large to fit in int64_t, + // and overflow in the offset computation. APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() ? IdxResult.isNegative() - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isSigned() + ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) + : IdxResult.ugt(MaxIdx)) return Error(OOE); - Result += (IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue()) * - ElementSize; + int64_t IdxVal = IdxResult.isUnsigned() + ? (int64_t)IdxResult.getZExtValue() + : IdxResult.getSExtValue(); + int64_t ElemSize = ElementSize.getQuantity(); + if (IdxVal != 0 && + ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) + return Error(OOE); + int64_t Offset = IdxVal * ElemSize; + if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset) + return Error(OOE); + Result += CharUnits::fromQuantity(Offset); break; } >From 61c46e1893b382ed4639b84d9f41952dfdcdf23e Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Wed, 17 Jun 2026 10:06:38 -0400 Subject: [PATCH 08/21] Add tests for __uint128_t > UINT64_MAX and multiply overflow --- clang/test/Sema/offsetof-unsigned-index.c | 10 ++++++++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 55f1e08f3a535..1b34ebe60443f 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -30,3 +30,13 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2cea1f32be472..2b3995d405705 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -35,3 +35,13 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: +// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a +// wrong result instead of an error). +static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} + +// A uint64_t index that causes index*sizeof(element) to overflow int64_t must +// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. +struct ShortArray { short data[2]; }; +static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} >From 1a2d73f466710bf551392aef641f0f12fab6a032 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Fri, 19 Jun 2026 09:54:59 -0400 Subject: [PATCH 09/21] Handle small AP-typed offsetof indices correctly --- clang/lib/AST/ByteCode/Compiler.cpp | 16 +++++++++++++--- clang/test/Sema/offsetof-unsigned-index.c | 5 +++++ clang/test/SemaCXX/offsetof-unsigned-index.cpp | 5 +++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 128dd6f01c0ab..2667bac8733a4 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3764,13 +3764,23 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { continue; } + if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { + // AP types (e.g. __uint128_t, __int128) cannot be safely cast to + // Sint64. Evaluate the constant and push it directly as Sint64. + Expr::EvalResult EvalResult; + if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + return false; + llvm::APSInt IdxVal = EvalResult.Val.getInt(); + if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) + return false; + if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + return false; + continue; + } if (!this->visit(ArrayIndexExpr)) return false; // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid // sign-extending values with the high bit set (e.g. uint8_t >= 128). - // AP types cannot be safely narrowed to Sint64; fail constant evaluation. - if (IndexT == PT_IntAP || IndexT == PT_IntAPS) - return false; if (IndexT != PT_Sint64) { if (!isSignedType(IndexT) && IndexT != PT_Uint64) { if (!this->emitCast(IndexT, PT_Uint64, E)) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 1b34ebe60443f..57bce06a25c98 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -31,6 +31,11 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expe // __uint128_t indices >= 0x8000000000000000 must be rejected. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(struct NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 2b3995d405705..880c45dec0e3e 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -36,6 +36,11 @@ static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-err // __uint128_t indices >= 0x8000000000000000 must be rejected. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +// Small __uint128_t values that fit in int64_t must work correctly. +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == + __builtin_offsetof(NegIdxStruct, x), + "offsetof with __uint128_t index 0 should work"); + // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). >From 1060b4331649596afa944e2ae76ee1cba4346f97 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 23 Jun 2026 09:49:51 -0400 Subject: [PATCH 10/21] Fix offsetof AP index handling: add CastNoOverflow opcode and APInt crash fix - ExprConstant.cpp: replace sgt/ugt(MaxIdx) with getActiveBits() > 63 to avoid APInt bit-width assertion crash when comparing 128-bit values - Opcodes.td: add APOnlyTypeClass (IntAP, IntAPS) and CastNoOverflow opcode - Interp.h: implement CastNoOverflow -- pops AP value, rejects negative or values that don't fit in int64_t (activeBits > 63), pushes as Sint64 - Compiler.cpp: for AP types, emit visit + CastNoOverflow instead of calling EvaluateAsInt (which is not allowed in the compiler phase) --- clang/lib/AST/ByteCode/Compiler.cpp | 10 ++-------- clang/lib/AST/ByteCode/Interp.h | 17 +++++++++++++++++ clang/lib/AST/ByteCode/Opcodes.td | 10 ++++++++++ clang/lib/AST/ExprConstant.cpp | 5 +---- clang/test/Sema/offsetof-unsigned-index.c | 11 +++++++---- clang/test/SemaCXX/offsetof-unsigned-index.cpp | 15 +++++++++------ 6 files changed, 46 insertions(+), 22 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 2667bac8733a4..41685b0b1a257 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3765,15 +3765,9 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { } if (IndexT == PT_IntAP || IndexT == PT_IntAPS) { - // AP types (e.g. __uint128_t, __int128) cannot be safely cast to - // Sint64. Evaluate the constant and push it directly as Sint64. - Expr::EvalResult EvalResult; - if (!ArrayIndexExpr->EvaluateAsInt(EvalResult, Ctx.getASTContext())) + if (!this->visit(ArrayIndexExpr)) return false; - llvm::APSInt IdxVal = EvalResult.Val.getInt(); - if (IdxVal.isNegative() || !IdxVal.isSignedIntN(64)) - return false; - if (!this->emitConstSint64((int64_t)IdxVal.getZExtValue(), E)) + if (!this->emitCastNoOverflow(IndexT, E)) return false; continue; } diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 97d9e7cc14e32..2aaf82dcd4051 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2884,6 +2884,23 @@ bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { return true; } +// Cast an AP integer to Sint64, failing constant evaluation if the value is +// negative or too large to fit (i.e. truncation would change the value). +template <PrimType Name, class T = typename PrimConv<Name>::T> +bool CastNoOverflow(InterpState &S, CodePtr OpPC) { + T Source = S.Stk.pop<T>(); + APSInt Val = Source.toAPSInt(); + if (Val.isNegative() || Val.getActiveBits() > 63) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_invalid_subexpr_in_const_expr) + << S.Current->getRange(OpPC); + return false; + } + S.Stk.push<Integral<64, true>>( + Integral<64, true>::from((int64_t)Val.getZExtValue())); + return true; +} + template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastIntegralFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, uint32_t FPOI) { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index b6190554178f3..7cda124b40e24 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -748,6 +748,16 @@ def CastAPS : Opcode { let HasGroup = 1; } +def APOnlyTypeClass : TypeClass { + let Types = [IntAP, IntAPS]; +} + +// Cast from an AP integer type to Sint64, failing if the value doesn't fit. +def CastNoOverflow : Opcode { + let Types = [APOnlyTypeClass]; + let HasGroup = 1; +} + // Cast an integer to a floating type def CastIntegralFloating : Opcode { let Types = [AluTypeClass]; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 46d341a8bc0c7..aa317ee2d106d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19399,10 +19399,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { CharUnits ElementSize = Info.Ctx.getTypeSizeInChars(CurrentType); // Reject negative indices, indices too large to fit in int64_t, // and overflow in the offset computation. - APSInt MaxIdx = APSInt::getMaxValue(64, /*Unsigned=*/false); - if (IdxResult.isSigned() - ? (IdxResult.isNegative() || IdxResult.sgt(MaxIdx)) - : IdxResult.ugt(MaxIdx)) + if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63) return Error(OOE); int64_t IdxVal = IdxResult.isUnsigned() ? (int64_t)IdxResult.getZExtValue() diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 57bce06a25c98..075a96b0e8163 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -26,22 +26,25 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} // __uint128_t indices >= 0x8000000000000000 must be rejected. -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} // Small __uint128_t values that fit in int64_t must work correctly. _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(struct NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(struct NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). -_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp index 880c45dec0e3e..22d0eda636f08 100644 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ b/clang/test/SemaCXX/offsetof-unsigned-index.cpp @@ -7,8 +7,8 @@ // Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 -#include <cstdint> -#include <cstddef> +#include <stdint.h> +#include <stddef.h> struct MyStruct { void *ptrs[256]; @@ -31,22 +31,25 @@ static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; -static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} // __uint128_t indices >= 0x8000000000000000 must be rejected. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} // Small __uint128_t values that fit in int64_t must work correctly. static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == __builtin_offsetof(NegIdxStruct, x), "offsetof with __uint128_t index 0 should work"); +static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) == + __builtin_offsetof(NegIdxStruct, x) + sizeof(int), + "offsetof with __uint128_t index 1 should work"); // __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: // old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a // wrong result instead of an error). -static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{subexpression not valid in a constant expression}} +static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} >From ee845e9b82223c8237a2960df803366e34cf7895 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Tue, 23 Jun 2026 14:36:29 -0400 Subject: [PATCH 11/21] Use APSInt::getExtValue() in offsetof index evaluation --- clang/lib/AST/ExprConstant.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index aa317ee2d106d..c33dfe8afacea 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19401,9 +19401,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { // and overflow in the offset computation. if (IdxResult.isNegative() || IdxResult.getActiveBits() > 63) return Error(OOE); - int64_t IdxVal = IdxResult.isUnsigned() - ? (int64_t)IdxResult.getZExtValue() - : IdxResult.getSExtValue(); + int64_t IdxVal = IdxResult.getExtValue(); int64_t ElemSize = ElementSize.getQuantity(); if (IdxVal != 0 && ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) >From d0e9aae9aaa8ce5725b15cae11bdddf8f1eea444 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:24:34 -0400 Subject: [PATCH 12/21] Move BigStruct/uint16_t test to .c file and drop .cpp test No C++-specific logic in the test; move uint16_t/BigStruct case to the .c test file and delete the redundant SemaCXX variant. Addresses reviewer comment from efriedma-quic. --- clang/test/Sema/offsetof-unsigned-index.c | 5 ++ .../test/SemaCXX/offsetof-unsigned-index.cpp | 55 ------------------- 2 files changed, 5 insertions(+), 55 deletions(-) delete mode 100644 clang/test/SemaCXX/offsetof-unsigned-index.cpp diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index 075a96b0e8163..d873175c22eb8 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -24,6 +24,11 @@ _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)128]) == 128 * _Static_assert(__builtin_offsetof(struct MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), "offsetof with uint8_t index 255 should be correctly zero-extended, not sign-extended"); +// uint16_t index: values >= 32768 were also affected by sign-extension. +struct BigStruct { char data[65536]; }; +_Static_assert(__builtin_offsetof(struct BigStruct, data[(uint16_t)32768]) == 32768, + "offsetof with uint16_t index 32768 should be correctly zero-extended"); + // Negative indices must be rejected. struct NegIdxStruct { int a; int x[1]; }; _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} diff --git a/clang/test/SemaCXX/offsetof-unsigned-index.cpp b/clang/test/SemaCXX/offsetof-unsigned-index.cpp deleted file mode 100644 index 22d0eda636f08..0000000000000 --- a/clang/test/SemaCXX/offsetof-unsigned-index.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -std=c++11 -fexperimental-new-constant-interpreter - -// Test that offsetof correctly zero-extends unsigned array indices >= 128. -// Previously, Clang would sign-extend uint8_t/uint16_t indices whose high bit -// was set, producing a large bogus offset value instead of the correct one. -// Also tests that negative indices and oversized __uint128_t indices are rejected. -// https://github.com/llvm/llvm-project/issues/199319 - -#include <stdint.h> -#include <stddef.h> - -struct MyStruct { - void *ptrs[256]; -}; - -// uint8_t index: values >= 128 were incorrectly sign-extended -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)127]) == 127 * sizeof(void *), - "offsetof with uint8_t index 127 should be correct"); -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)128]) == 128 * sizeof(void *), - "offsetof with uint8_t index 128 should be correctly zero-extended"); -static_assert(__builtin_offsetof(MyStruct, ptrs[(uint8_t)255]) == 255 * sizeof(void *), - "offsetof with uint8_t index 255 should be correctly zero-extended"); - -// uint16_t index: values >= 32768 were also affected -struct BigStruct { - char data[65536]; -}; -static_assert(__builtin_offsetof(BigStruct, data[(uint16_t)32768]) == 32768, - "offsetof with uint16_t index 32768 should be correctly zero-extended"); - -// Negative indices must be rejected. -struct NegIdxStruct { int a; int x[1]; }; -static_assert(__builtin_offsetof(NegIdxStruct, x[-1]) == 0, ""); // expected-error {{not an integral constant expression}} - -// __uint128_t indices >= 0x8000000000000000 must be rejected. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0x8000000000000000]) == 0, ""); // expected-error {{not an integral constant expression}} - -// Small __uint128_t values that fit in int64_t must work correctly. -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)0]) == - __builtin_offsetof(NegIdxStruct, x), - "offsetof with __uint128_t index 0 should work"); -static_assert(__builtin_offsetof(NegIdxStruct, x[(__uint128_t)1]) == - __builtin_offsetof(NegIdxStruct, x) + sizeof(int), - "offsetof with __uint128_t index 1 should work"); - -// __uint128_t indices > UINT64_MAX must be rejected (e.g. adding another zero: -// old code would truncate 2^64 to 0 via PT_Uint64 cast, silently producing a -// wrong result instead of an error). -static_assert(__builtin_offsetof(NegIdxStruct, x[((__uint128_t)1 << 64)]) == 0, ""); // expected-error {{not an integral constant expression}} - -// A uint64_t index that causes index*sizeof(element) to overflow int64_t must -// be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. -struct ShortArray { short data[2]; }; -static_assert(__builtin_offsetof(ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} >From 9ec2151c158b192cb6f8785253bdb2faf7cf501b Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:33:22 -0400 Subject: [PATCH 13/21] Add specific diagnostic for overflow in offsetof computation Instead of the generic 'subexpression not valid in a constant expression' note, emit 'overflow in offsetof' when the multiply or add step of the offsetof calculation overflows int64_t. Add note_constexpr_offsetof_overflow to DiagnosticASTKinds.td and use it in both the ExprConstant and ByteCode (InterpBuiltin) paths. Update the test to expect the new note. Addresses reviewer comment from efriedma-quic. --- clang/include/clang/Basic/DiagnosticASTKinds.td | 2 ++ clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++++++-- clang/lib/AST/ExprConstant.cpp | 4 ++-- clang/test/Sema/offsetof-unsigned-index.c | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index bde418695f647..f86f0157b2b1f 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -24,6 +24,8 @@ def note_constexpr_invalid_downcast : Note< "cannot cast object of dynamic type %0 to type %1">; def note_constexpr_overflow : Note< "value %0 is outside the range of representable values of type %1">; +def note_constexpr_offsetof_overflow : Note< + "overflow in offsetof">; def note_constexpr_negative_shift : Note<"negative shift count %0">; def note_constexpr_large_shift : Note< "shift count %0 >= width of type %1 (%2 bit%s2)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0c247f8b15fae..b182820a0d028 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6683,11 +6683,19 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, CurrentType = AT->getElementType(); CharUnits ElementSize = S.getASTContext().getTypeSizeInChars(CurrentType); int64_t ElemSize = ElementSize.getQuantity(); - if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) + if (Index != 0 && ElemSize > llvm::maxIntN(64) / Index) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_offsetof_overflow) + << S.Current->getRange(OpPC); return false; + } int64_t Offset = Index * ElemSize; - if (Result.getQuantity() > llvm::maxIntN(64) - Offset) + if (Result.getQuantity() > llvm::maxIntN(64) - Offset) { + S.FFDiag(S.Current->getLocation(OpPC), + diag::note_constexpr_offsetof_overflow) + << S.Current->getRange(OpPC); return false; + } Result += CharUnits::fromQuantity(Offset); ++ArrayIndex; break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c33dfe8afacea..e2cc1d7c85872 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -19405,10 +19405,10 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { int64_t ElemSize = ElementSize.getQuantity(); if (IdxVal != 0 && ElemSize > std::numeric_limits<int64_t>::max() / IdxVal) - return Error(OOE); + return Error(OOE, diag::note_constexpr_offsetof_overflow); int64_t Offset = IdxVal * ElemSize; if (Result.getQuantity() > std::numeric_limits<int64_t>::max() - Offset) - return Error(OOE); + return Error(OOE, diag::note_constexpr_offsetof_overflow); Result += CharUnits::fromQuantity(Offset); break; } diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index d873175c22eb8..cafd2ddff58f8 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -52,4 +52,4 @@ _Static_assert(__builtin_offsetof(struct NegIdxStruct, x[((__uint128_t)1 << 64)] // A uint64_t index that causes index*sizeof(element) to overflow int64_t must // be rejected. 4611686018427387904 * sizeof(short)==2 == 2^63 > INT64_MAX. struct ShortArray { short data[2]; }; -_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} +_Static_assert(__builtin_offsetof(struct ShortArray, data[(uint64_t)4611686018427387904ULL]) == 0, ""); // expected-error {{not an integral constant expression}} expected-note {{overflow in offsetof}} >From fec17a8c32c13d2e4784b15fbf0a3893c1dadd7c Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:35:54 -0400 Subject: [PATCH 14/21] Replace standard includes in test with manual typedefs Remove #include <stdint.h> and <stddef.h> from offsetof-unsigned-index.c and add manual typedef declarations instead, following the convention that clang test files should not use standard includes. Addresses reviewer comment from tbaederr. --- clang/test/Sema/offsetof-unsigned-index.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/test/Sema/offsetof-unsigned-index.c b/clang/test/Sema/offsetof-unsigned-index.c index cafd2ddff58f8..56396eb2a12e6 100644 --- a/clang/test/Sema/offsetof-unsigned-index.c +++ b/clang/test/Sema/offsetof-unsigned-index.c @@ -7,8 +7,9 @@ // Also tests that negative indices and oversized __uint128_t indices are rejected. // https://github.com/llvm/llvm-project/issues/199319 -#include <stdint.h> -#include <stddef.h> +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long long uint64_t; struct MyStruct { void *ptrs[256]; >From d09c2beccdbbb572617dc31d7094c663ab37eb56 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:37:34 -0400 Subject: [PATCH 15/21] Update clang/lib/AST/ByteCode/Interp.h Co-authored-by: Timm Baeder <[email protected]> --- clang/lib/AST/ByteCode/Interp.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 2aaf82dcd4051..1ce15cf7459dd 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2891,10 +2891,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) { T Source = S.Stk.pop<T>(); APSInt Val = Source.toAPSInt(); if (Val.isNegative() || Val.getActiveBits() > 63) { - S.FFDiag(S.Current->getLocation(OpPC), - diag::note_invalid_subexpr_in_const_expr) - << S.Current->getRange(OpPC); - return false; +return Invalid(S, OpPC); } S.Stk.push<Integral<64, true>>( Integral<64, true>::from((int64_t)Val.getZExtValue())); >From e7941837ce93c0ec5e410c45ab836ed1825ae886 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:38:51 -0400 Subject: [PATCH 16/21] Fix indentation of Invalid(S, OpPC) in CastNoOverflow --- clang/lib/AST/ByteCode/Interp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 1ce15cf7459dd..28cb235c7d781 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2891,7 +2891,7 @@ bool CastNoOverflow(InterpState &S, CodePtr OpPC) { T Source = S.Stk.pop<T>(); APSInt Val = Source.toAPSInt(); if (Val.isNegative() || Val.getActiveBits() > 63) { -return Invalid(S, OpPC); + return Invalid(S, OpPC); } S.Stk.push<Integral<64, true>>( Integral<64, true>::from((int64_t)Val.getZExtValue())); >From 39cb083dd528a1605c9612e6376e32d66085ebac Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:45:14 -0400 Subject: [PATCH 17/21] Remove unnecessary two-step cast in VisitOffsetOfExpr ByteCode path The intermediate Uint64 step was not needed: Cast<PT_UintN, PT_Sint64> uses a C++ implicit conversion which already correctly zero-extends unsigned values. Revert the non-AP path to the original direct emitCast(IndexT, PT_Sint64). Addresses reviewer comment from tbaederr. --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 41685b0b1a257..2458860f44eb5 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -3773,14 +3773,8 @@ bool Compiler<Emitter>::VisitOffsetOfExpr(const OffsetOfExpr *E) { } if (!this->visit(ArrayIndexExpr)) return false; - // Cast to Sint64. For unsigned types, cast to Uint64 first to avoid - // sign-extending values with the high bit set (e.g. uint8_t >= 128). + // Cast to Sint64. if (IndexT != PT_Sint64) { - if (!isSignedType(IndexT) && IndexT != PT_Uint64) { - if (!this->emitCast(IndexT, PT_Uint64, E)) - return false; - IndexT = PT_Uint64; - } if (!this->emitCast(IndexT, PT_Sint64, E)) return false; } >From e22b2a826009f77874409edc791358dce249d602 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Thu, 25 Jun 2026 09:58:18 -0400 Subject: [PATCH 18/21] Add release note for offsetof unsigned index sign-extension fix --- clang/docs/ReleaseNotes.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/docs/ReleaseNotes.md b/clang/docs/ReleaseNotes.md index 0f252fdf3601b..37e78b93a7217 100644 --- a/clang/docs/ReleaseNotes.md +++ b/clang/docs/ReleaseNotes.md @@ -759,6 +759,9 @@ latest release, please see the [Clang Web Site](https://clang.llvm.org) or the calling `__builtin_bit_cast`. (#GH200112) - Clang now SFINAE friendly when the ``__reference_meows_from_temporary`` builtins should SFINAE friendly when the 1st type is not a reference type. (#GH206524) +- Fixed `__builtin_offsetof` incorrectly sign-extending unsigned array indices + with the high bit set (e.g. `uint8_t` values >= 128), which produced wrong + offset values in constant expressions. (#GH199319) #### Bug Fixes to Attribute Support >From d7cafdf2ee3928e3e44cf7c7e5daad897dc27736 Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Fri, 26 Jun 2026 09:13:51 -0400 Subject: [PATCH 19/21] Update clang/lib/AST/ByteCode/Interp.h Co-authored-by: Timm Baeder <[email protected]> --- clang/lib/AST/ByteCode/Interp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 28cb235c7d781..84564d0a33cff 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2890,9 +2890,8 @@ template <PrimType Name, class T = typename PrimConv<Name>::T> bool CastNoOverflow(InterpState &S, CodePtr OpPC) { T Source = S.Stk.pop<T>(); APSInt Val = Source.toAPSInt(); - if (Val.isNegative() || Val.getActiveBits() > 63) { + if (Val.isNegative() || Val.getActiveBits() > 63) return Invalid(S, OpPC); - } S.Stk.push<Integral<64, true>>( Integral<64, true>::from((int64_t)Val.getZExtValue())); return true; >From 75b62d4fe5a606deebae65ae35b8fea524cac58e Mon Sep 17 00:00:00 2001 From: Marlus Cadanus da Costa <[email protected]> Date: Fri, 26 Jun 2026 09:14:11 -0400 Subject: [PATCH 20/21] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp Co-authored-by: Timm Baeder <[email protected]> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b182820a0d028..ab5a4d2bab41b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6671,12 +6671,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, } case OffsetOfNode::Array: { int64_t Index = ArrayIndices[ArrayIndex]; - if (Index < 0) { - S.FFDiag(S.Current->getLocation(OpPC), - diag::note_invalid_subexpr_in_const_expr) - << S.Current->getRange(OpPC); - return false; - } + if (Index < 0) + return Invalid(S, OpPC); const ArrayType *AT = S.getASTContext().getAsArrayType(CurrentType); if (!AT) return false; >From 3fb104a7f1261ab1993aaa65646c2fea0256d400 Mon Sep 17 00:00:00 2001 From: Cadanus da Costa <[email protected]> Date: Fri, 26 Jun 2026 09:15:42 -0400 Subject: [PATCH 21/21] Restore accidentally removed comment in InterpretOffsetOf --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ab5a4d2bab41b..14717a077075a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -6670,6 +6670,8 @@ bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E, break; } case OffsetOfNode::Array: { + // When generating bytecode, we put all the index expressions as Sint64 on + // the stack. int64_t Index = ArrayIndices[ArrayIndex]; if (Index < 0) return Invalid(S, OpPC); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
