https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/159046
>From 7fdec0a94298caae4bb7bd69a9d165524df11fb7 Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Tue, 16 Sep 2025 06:23:44 -0500 Subject: [PATCH 1/7] [clang] add array out-of-bounds access constraints using llvm.assume Following C and C++ standards, generate llvm.assume statements for array subscript bounds to provide optimization hints. For this code: ``` int arr[10]; int example(int i) { return arr[i]; } ``` clang now generates an `assume(i < 10)`: ``` define i32 @example(i32 noundef %i) local_unnamed_addr #0 { entry: %idxprom = zext nneg i32 %i to i64 %bounds.constraint = icmp ult i32 %i, 10 tail call void @llvm.assume(i1 %bounds.constraint) %arrayidx = getelementptr inbounds nuw i32, ptr @arr, i64 %idxprom %0 = load i32, ptr %arrayidx, align 4, !tbaa !2 ret i32 %0 } ``` --- clang/lib/CodeGen/CGExpr.cpp | 112 ++++++++++++++++++ clang/lib/CodeGen/CGExprScalar.cpp | 3 + clang/lib/CodeGen/CodeGenFunction.h | 7 ++ clang/test/CodeGen/array-bounds-constraints.c | 39 ++++++ 4 files changed, 161 insertions(+) create mode 100644 clang/test/CodeGen/array-bounds-constraints.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e6e4947882544..d4425d76d10fe 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4559,6 +4559,97 @@ void CodeGenFunction::EmitCountedByBoundsChecking( } } +/// Emit array bounds constraints using llvm.assume for optimization hints. +/// +/// C Standard (ISO/IEC 9899:2011 - C11) +/// Section J.2 (Undefined behavior): An array subscript is out of range, even +/// if an object is apparently accessible with the given subscript (as in the +/// lvalue expression a[1][7] given the declaration int a[4][5]) (6.5.6). +/// +/// Section 6.5.6 (Additive operators): If both the pointer operand and the +/// result point to elements of the same array object, or one past the last +/// element of the array object, the evaluation shall not produce an overflow; +/// otherwise, the behavior is undefined. +/// +/// C++ Standard (ISO/IEC 14882 - 2017) +/// Section 8.7 (Additive operators): +/// 4 When an expression that has integral type is added to or subtracted from a +/// pointer, the result has the type of the pointer operand. If the expression +/// P points to element x[i] of an array object x with n elements,^86 the +/// expressions P + J and J + P (where J has the value j) point to the +/// (possibly-hypothetical) element x[i + j] if 0 ≤ i + j ≤ n; otherwise, the +/// behavior is undefined. Likewise, the expression P - J points to the +/// (possibly-hypothetical) element x[i − j] if 0 ≤ i − j ≤ n; otherwise, the +/// behavior is undefined. +/// ^86 A pointer past the last element of an array x of n elements is +/// considered to be equivalent to a pointer to a hypothetical element x[n] +/// for this purpose; see 6.9.2. +/// +/// This function emits llvm.assume statements to inform the optimizer that +/// array subscripts are within bounds, enabling better optimization without +/// duplicating side effects from the subscript expression. The IndexVal +/// parameter should be the already-emitted index value to avoid re-evaluation. +void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, + llvm::Value *IndexVal) { + const Expr *Base = E->getBase(); + const Expr *Idx = E->getIdx(); + QualType BaseType = Base->getType(); + + if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Base)) { + if (ICE->getCastKind() == CK_ArrayToPointerDecay) { + BaseType = ICE->getSubExpr()->getType(); + } + } + + // For now: only handle constant array types. + const ConstantArrayType *CAT = getContext().getAsConstantArrayType(BaseType); + if (!CAT) + return; + + llvm::APInt ArraySize = CAT->getSize(); + if (ArraySize == 0) + return; + + QualType IdxType = Idx->getType(); + llvm::Type *IndexType = ConvertType(IdxType); + llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0); + + uint64_t ArraySizeValue = ArraySize.getLimitedValue(); + llvm::Value *ArraySizeVal = llvm::ConstantInt::get(IndexType, ArraySizeValue); + + // Use the provided IndexVal to avoid duplicating side effects. + // The caller has already emitted the index expression once. + if (!IndexVal) + return; + + // Ensure index value has the same type as our constants. + if (IndexVal->getType() != IndexType) { + bool IsSigned = IdxType->isSignedIntegerOrEnumerationType(); + IndexVal = Builder.CreateIntCast(IndexVal, IndexType, IsSigned, "idx.cast"); + } + + // Create bounds constraint: 0 <= index && index < size. + // C arrays are 0-based, so valid indices are [0, size-1]. + // This enforces the C18 standard requirement that array subscripts + // must be "greater than or equal to zero and less than the size of the + // array." + llvm::Value *LowerBound, *UpperBound; + if (IdxType->isSignedIntegerOrEnumerationType()) { + // For signed indices: index >= 0 && index < size. + LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero"); + UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size"); + } else { + // For unsigned indices: index < size (>= 0 is implicit). + LowerBound = Builder.getTrue(); + UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size"); + } + + llvm::Value *BoundsConstraint = + Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint"); + llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume); + Builder.CreateCall(AssumeIntrinsic, BoundsConstraint); +} + LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, bool Accessed) { // The index must always be an integer, which is not an aggregate. Emit it @@ -4588,6 +4679,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, }; IdxPre = nullptr; + // Array bounds constraints will be emitted after index evaluation to avoid + // duplicating side effects from the index expression. + // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. if (E->getBase()->getType()->isSubscriptableVectorType() && @@ -4595,6 +4689,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); + + // Emit array bounds constraints for vector subscripts. + EmitArrayBoundsConstraints(E, Idx); + assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), LHS.getBaseInfo(), TBAAAccessInfo()); @@ -4635,6 +4733,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + // Emit array bounds constraints for VLA access (though VLAs typically don't + // have constant bounds). + EmitArrayBoundsConstraints(E, Idx); + // The element count here is the total number of non-VLA elements. llvm::Value *numElements = getVLASize(vla).NumElts; @@ -4659,6 +4761,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + // Emit array bounds constraints for ObjC interface access. + EmitArrayBoundsConstraints(E, Idx); + CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT); llvm::Value *InterfaceSizeVal = llvm::ConstantInt::get(Idx->getType(), InterfaceSize.getQuantity()); @@ -4694,6 +4799,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, ArrayLV = EmitLValue(Array); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + // Emit array bounds constraints for optimization. + EmitArrayBoundsConstraints(E, Idx); + if (SanOpts.has(SanitizerKind::ArrayBounds)) EmitCountedByBoundsChecking(Array, Idx, ArrayLV.getAddress(), E->getIdx()->getType(), Array->getType(), @@ -4737,6 +4845,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, Address BaseAddr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + + // Emit array bounds constraints for pointer-based array access. + EmitArrayBoundsConstraints(E, Idx); + QualType ptrType = E->getBase()->getType(); Addr = emitArraySubscriptGEP(*this, BaseAddr, Idx, E->getType(), !getLangOpts().PointerOverflowDefined, diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 4fa25c5d66669..28f702f9237e4 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2100,6 +2100,9 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { if (CGF.SanOpts.has(SanitizerKind::ArrayBounds)) CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true); + // Emit array bounds constraints for vector element access. + CGF.EmitArrayBoundsConstraints(E, Idx); + return Builder.CreateExtractElement(Base, Idx, "vecext"); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 727487b46054f..6283841b7b170 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3341,6 +3341,13 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *Index, QualType IndexType, QualType IndexedType, bool Accessed); + /// Emit array bounds constraints using llvm.assume for optimization hints. + /// Emits assume statements for array bounds without duplicating side effects. + /// Takes the already-emitted index value to avoid re-evaluating expressions + /// with side effects. Helps optimizer with vectorization and bounds analysis. + void EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, + llvm::Value *IndexVal); + /// Returns debug info, with additional annotation if /// CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo[Ordinal] is enabled for /// any of the ordinals. diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c new file mode 100644 index 0000000000000..77e5199a1573a --- /dev/null +++ b/clang/test/CodeGen/array-bounds-constraints.c @@ -0,0 +1,39 @@ +// Test that array bounds constraints generate llvm.assume statements for optimization hints. +// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s + +// This test verifies that clang generates llvm.assume statements to inform the +// optimizer that array subscripts are within bounds to enable better optimization. + +// CHECK-LABEL: define {{.*}} @test_simple_array +int test_simple_array(int i) { + int arr[10]; // C arrays are 0-based: valid indices are [0, 9] + // CHECK: %{{.*}} = icmp ult i32 %i, 10 + // CHECK: call void @llvm.assume(i1 %{{.*}}) + return arr[i]; +} + +// CHECK-LABEL: define {{.*}} @test_multidimensional_array +int test_multidimensional_array(int i, int j) { + int arr[5][8]; // Valid indices: i in [0, 4], j in [0, 7] + // CHECK: %{{.*}} = icmp ult i32 %i, 5 + // CHECK: call void @llvm.assume(i1 %{{.*}}) + // CHECK: %{{.*}} = icmp ult i32 %j, 8 + // CHECK: call void @llvm.assume(i1 %{{.*}}) + return arr[i][j]; +} + +// CHECK-LABEL: define {{.*}} @test_unsigned_index +int test_unsigned_index(unsigned int i) { + int arr[10]; + // CHECK: %{{.*}} = icmp ult i32 %i, 10 + // CHECK: call void @llvm.assume(i1 %{{.*}}) + return arr[i]; +} + +// CHECK-LABEL: define {{.*}} @test_store_undef +void test_store_undef(int i, int value) { + int arr[10]; + // CHECK: %{{.*}} = icmp ult i32 %i, 10 + // CHECK: call void @llvm.assume(i1 %{{.*}}) + arr[i] = value; +} >From 702d9dd71e4a646077e5d3347f36a43807dcec4a Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Tue, 16 Sep 2025 06:23:44 -0500 Subject: [PATCH 2/7] add -fassume-array-bounds flag disabled by default for now Sanitizer interaction: assume generation is disabled when -fsanitize=array-bounds is active. Flexible array detection: skip size-1 arrays as last struct field. --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 5 ++ clang/lib/CodeGen/CGExpr.cpp | 59 +++++++++--- .../CodeGen/array-bounds-constraints-safety.c | 89 +++++++++++++++++++ clang/test/CodeGen/array-bounds-constraints.c | 9 +- 5 files changed, 149 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/array-bounds-constraints-safety.c diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 872f73ebf3810..274ae075c2de7 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -33,6 +33,7 @@ CODEGENOPT(ImplicitMapSyms, 1, 0, Benign) ///< -Wa,-mmapsyms=implicit CODEGENOPT(AsmVerbose , 1, 0, Benign) ///< -dA, -fverbose-asm. CODEGENOPT(PreserveAsmComments, 1, 1, Benign) ///< -dA, -fno-preserve-as-comments. CODEGENOPT(AssumeSaneOperatorNew , 1, 1, Benign) ///< implicit __attribute__((malloc)) operator new +CODEGENOPT(AssumeArrayBounds , 1, 0, Benign) ///< Generate llvm.assume for array bounds. CODEGENOPT(AssumeUniqueVTables , 1, 1, Benign) ///< Assume a class has only one vtable. CODEGENOPT(Autolink , 1, 1, Benign) ///< -fno-autolink CODEGENOPT(AutoImport , 1, 1, Benign) ///< -fno-auto-import diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index def7c09d58cfb..d4b324e9650a7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1603,6 +1603,11 @@ defm assume_unique_vtables : BoolFOption<"assume-unique-vtables", BothFlags<[], [ClangOption, CLOption]>>; def fassume_sane_operator_new : Flag<["-"], "fassume-sane-operator-new">, Group<f_Group>; +defm assume_array_bounds : BoolFOption<"assume-array-bounds", + CodeGenOpts<"AssumeArrayBounds">, DefaultFalse, + PosFlag<SetTrue, [], [ClangOption, CC1Option], + "Generate llvm.assume for array bounds to enable optimizations (may break code with intentional out-of-bounds access)">, + NegFlag<SetFalse, [], [ClangOption, CC1Option]>>; def fastcp : Flag<["-"], "fastcp">, Group<f_Group>; def fastf : Flag<["-"], "fastf">, Group<f_Group>; def fast : Flag<["-"], "fast">, Group<f_Group>; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d4425d76d10fe..13416228873cc 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4589,8 +4589,25 @@ void CodeGenFunction::EmitCountedByBoundsChecking( /// array subscripts are within bounds, enabling better optimization without /// duplicating side effects from the subscript expression. The IndexVal /// parameter should be the already-emitted index value to avoid re-evaluation. +/// +/// Code that intentionally accesses out-of-bounds (UB) may break with +/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or +/// flexible arrays.) Disabled when -fsanitize=array-bounds is active. +/// void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, llvm::Value *IndexVal) { + // Disable with -fno-assume-array-bounds. + if (!CGM.getCodeGenOpts().AssumeArrayBounds) + return; + + // Disable at -O0. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + // Disable with array-bounds sanitizer. + if (SanOpts.has(SanitizerKind::ArrayBounds)) + return; + const Expr *Base = E->getBase(); const Expr *Idx = E->getIdx(); QualType BaseType = Base->getType(); @@ -4610,6 +4627,26 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, if (ArraySize == 0) return; + // Don't generate assumes for flexible array member pattern. + // Arrays of size 1 in structs are often used as placeholders for + // variable-length data (pre-C99 flexible array member idiom.) + if (ArraySize == 1) { + if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) { + if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { + const RecordDecl *RD = FD->getParent(); + // Check if this field is the last field in the record. + // Only the last field can be a flexible array member. + const FieldDecl *LastField = nullptr; + for (const auto *Field : RD->fields()) + LastField = Field; + if (LastField == FD) + // This is a size-1 array as the last field in a struct. + // Likely a flexible array member pattern - skip assumes. + return; + } + } + } + QualType IdxType = Idx->getType(); llvm::Type *IndexType = ConvertType(IdxType); llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0); @@ -4633,21 +4670,21 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, // This enforces the C18 standard requirement that array subscripts // must be "greater than or equal to zero and less than the size of the // array." - llvm::Value *LowerBound, *UpperBound; if (IdxType->isSignedIntegerOrEnumerationType()) { // For signed indices: index >= 0 && index < size. - LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero"); - UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size"); + llvm::Value *LowerBound = + Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero"); + llvm::Value *UpperBound = + Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size"); + llvm::Value *BoundsConstraint = + Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint"); + Builder.CreateAssumption(BoundsConstraint); } else { - // For unsigned indices: index < size (>= 0 is implicit). - LowerBound = Builder.getTrue(); - UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size"); + // For unsigned indices: index < size (>= 0 is implicit.) + llvm::Value *UpperBound = + Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size"); + Builder.CreateAssumption(UpperBound); } - - llvm::Value *BoundsConstraint = - Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint"); - llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume); - Builder.CreateCall(AssumeIntrinsic, BoundsConstraint); } LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c new file mode 100644 index 0000000000000..bbd72900361ec --- /dev/null +++ b/clang/test/CodeGen/array-bounds-constraints-safety.c @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s +// Test that array bounds constraints are NOT applied to cases that might +// break real-world code with intentional out-of-bounds access patterns. + +// C18 standard allows one-past-the-end pointers, and some legacy code +// intentionally accesses out-of-bounds for performance or compatibility. +// This test verifies that bounds constraints are only applied to safe cases. + +// CHECK-LABEL: define {{.*}} @test_flexible_array_member +struct Data { + int count; + int items[1]; // Flexible array member pattern (pre-C99 style) +}; + +int test_flexible_array_member(struct Data *d, int i) { + // CHECK-NOT: call void @llvm.assume + // Flexible array member pattern (size 1 array as last field) should NOT + // generate bounds constraints because items[1] is just a placeholder + // for a larger array allocated with `malloc (sizeof (struct Data) + 42)`. + return d->items[i]; +} + +// CHECK-LABEL: define {{.*}} @test_not_flexible_array +struct NotFlexible { + int items[1]; // Size 1 array but NOT the last field. + int count; // Something comes after it. +}; + +int test_not_flexible_array(struct NotFlexible *s, int i) { + // CHECK: call void @llvm.assume + // This is NOT a flexible array pattern (not the last field), + // so we're fine generating `assume(i < 1)`. + return s->items[i]; +} + +// CHECK-LABEL: define {{.*}} @test_pointer_parameter +int test_pointer_parameter(int *arr, int i) { + // CHECK-NOT: call void @llvm.assume + // Pointer parameters should NOT generate bounds constraints + // because we don't know the actual array size. + return arr[i]; +} + +// CHECK-LABEL: define {{.*}} @test_vla +int test_vla(int n, int i) { + int arr[n]; // Variable-length array. + // CHECK-NOT: call void @llvm.assume + // VLAs should NOT generate bounds constraints + // because the size is dynamic. + return arr[i]; +} + +// CHECK-LABEL: define {{.*}} @test_one_past_end +extern int extern_array[100]; +int *test_one_past_end(void) { + // CHECK-NOT: call void @llvm.assume + // Taking address of one-past-the-end is allowed by C standard. + // We should NOT assume anything about this access. + return &extern_array[100]; // Legal: one past the end. +} + +// CHECK-LABEL: define {{.*}} @test_extern_array +int test_extern_array(int i) { + // CHECK: call void @llvm.assume + // This will generate bounds constraints. + // The array is a constant-size global array. + // This is the safe case where we want optimization hints. + return extern_array[i]; +} + +// CHECK-LABEL: define {{.*}} @test_local_constant_array +int test_local_constant_array(int i) { + int arr[10]; + // CHECK: call void @llvm.assume + // This will generate bounds constraints. + // We know the exact size of this alloca array. + // This is the safe case where we want optimization hints. + return arr[i]; +} + +// CHECK-LABEL: define {{.*}} @test_malloc_array +int *my_malloc(int); +int test_malloc_array(int i) { + // CHECK-NOT: call void @llvm.assume + // Dynamically allocated arrays accessed via pointers do not get bounds + // constraints. + int *x = my_malloc(100 * sizeof(int)); + return x[i]; +} diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c index 77e5199a1573a..a2f3ad9e514cb 100644 --- a/clang/test/CodeGen/array-bounds-constraints.c +++ b/clang/test/CodeGen/array-bounds-constraints.c @@ -1,14 +1,17 @@ -// Test that array bounds constraints generate llvm.assume statements for optimization hints. -// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s - // This test verifies that clang generates llvm.assume statements to inform the // optimizer that array subscripts are within bounds to enable better optimization. +// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s + +// Verify no assumes are generated. +// RUN: %clang_cc1 -emit-llvm -O2 -fno-assume-array-bounds %s -o - | FileCheck %s -check-prefix=NO-FLAG // CHECK-LABEL: define {{.*}} @test_simple_array +// NO-FLAG-LABEL: define {{.*}} @test_simple_array int test_simple_array(int i) { int arr[10]; // C arrays are 0-based: valid indices are [0, 9] // CHECK: %{{.*}} = icmp ult i32 %i, 10 // CHECK: call void @llvm.assume(i1 %{{.*}}) + // NO-FLAG-NOT: call void @llvm.assume return arr[i]; } >From ec1024d012bd336d9efa996709bab3c23e59ab36 Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Sat, 4 Oct 2025 18:09:35 -0500 Subject: [PATCH 3/7] fix UB in testcase --- clang/test/CodeGen/array-bounds-constraints-safety.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c index bbd72900361ec..3484853ceb662 100644 --- a/clang/test/CodeGen/array-bounds-constraints-safety.c +++ b/clang/test/CodeGen/array-bounds-constraints-safety.c @@ -69,8 +69,10 @@ int test_extern_array(int i) { } // CHECK-LABEL: define {{.*}} @test_local_constant_array +void init_array(int *arr); int test_local_constant_array(int i) { int arr[10]; + init_array(arr); // Initialize to avoid UB from uninitialized read. // CHECK: call void @llvm.assume // This will generate bounds constraints. // We know the exact size of this alloca array. >From 77e2606c98c8785227a3e08c0ea21c4166415805 Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Sat, 4 Oct 2025 18:13:31 -0500 Subject: [PATCH 4/7] fix UB in testcase --- clang/test/CodeGen/array-bounds-constraints.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c index a2f3ad9e514cb..b6bc9050b039a 100644 --- a/clang/test/CodeGen/array-bounds-constraints.c +++ b/clang/test/CodeGen/array-bounds-constraints.c @@ -7,8 +7,10 @@ // CHECK-LABEL: define {{.*}} @test_simple_array // NO-FLAG-LABEL: define {{.*}} @test_simple_array +void init_array(int *arr); int test_simple_array(int i) { int arr[10]; // C arrays are 0-based: valid indices are [0, 9] + init_array(arr); // Initialize to avoid UB from uninitialized read. // CHECK: %{{.*}} = icmp ult i32 %i, 10 // CHECK: call void @llvm.assume(i1 %{{.*}}) // NO-FLAG-NOT: call void @llvm.assume @@ -18,6 +20,7 @@ int test_simple_array(int i) { // CHECK-LABEL: define {{.*}} @test_multidimensional_array int test_multidimensional_array(int i, int j) { int arr[5][8]; // Valid indices: i in [0, 4], j in [0, 7] + init_array(arr[0]); // Initialize to avoid UB from uninitialized read. // CHECK: %{{.*}} = icmp ult i32 %i, 5 // CHECK: call void @llvm.assume(i1 %{{.*}}) // CHECK: %{{.*}} = icmp ult i32 %j, 8 @@ -28,6 +31,7 @@ int test_multidimensional_array(int i, int j) { // CHECK-LABEL: define {{.*}} @test_unsigned_index int test_unsigned_index(unsigned int i) { int arr[10]; + init_array(arr); // Initialize to avoid UB from uninitialized read. // CHECK: %{{.*}} = icmp ult i32 %i, 10 // CHECK: call void @llvm.assume(i1 %{{.*}}) return arr[i]; >From 0ed60c2504985275c70838b91c3782fcf96d6eed Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Mon, 6 Oct 2025 09:28:57 -0500 Subject: [PATCH 5/7] avoid optimization in testcase --- clang/test/CodeGen/array-bounds-constraints.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c index b6bc9050b039a..ed4e3a603ac90 100644 --- a/clang/test/CodeGen/array-bounds-constraints.c +++ b/clang/test/CodeGen/array-bounds-constraints.c @@ -43,4 +43,5 @@ void test_store_undef(int i, int value) { // CHECK: %{{.*}} = icmp ult i32 %i, 10 // CHECK: call void @llvm.assume(i1 %{{.*}}) arr[i] = value; + init_array(arr); // Avoid optimization of the above statement. } >From 1c11e607e149dce1df94a3ef2e515115614f2393 Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Mon, 6 Oct 2025 10:57:58 -0500 Subject: [PATCH 6/7] handle zero-length array pattern --- clang/lib/CodeGen/CGExpr.cpp | 12 ++++++------ .../test/CodeGen/array-bounds-constraints-safety.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 13416228873cc..1d7ebdfdd2279 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4624,13 +4624,13 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, return; llvm::APInt ArraySize = CAT->getSize(); - if (ArraySize == 0) - return; // Don't generate assumes for flexible array member pattern. - // Arrays of size 1 in structs are often used as placeholders for - // variable-length data (pre-C99 flexible array member idiom.) - if (ArraySize == 1) { + // Size-1 arrays: "struct { int len; char data[1]; }" (pre-C99 idiom.) + // Zero-length arrays: "struct { int len; char data[0]; }" (GCC extension + // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html) + // Both patterns use arrays as placeholders for variable-length data. + if (ArraySize == 0 || ArraySize == 1) { if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) { if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { const RecordDecl *RD = FD->getParent(); @@ -4640,7 +4640,7 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, for (const auto *Field : RD->fields()) LastField = Field; if (LastField == FD) - // This is a size-1 array as the last field in a struct. + // This is a zero-length or size-1 array as the last field. // Likely a flexible array member pattern - skip assumes. return; } diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c index 3484853ceb662..e4a5c361391b6 100644 --- a/clang/test/CodeGen/array-bounds-constraints-safety.c +++ b/clang/test/CodeGen/array-bounds-constraints-safety.c @@ -6,6 +6,19 @@ // intentionally accesses out-of-bounds for performance or compatibility. // This test verifies that bounds constraints are only applied to safe cases. +// CHECK-LABEL: define {{.*}} @test_zero_length_array +struct ZeroLengthData { + int count; + int items[0]; // GNU C extension: zero-length array +}; + +int test_zero_length_array(struct ZeroLengthData *d, int i) { + // CHECK-NOT: call void @llvm.assume + // Zero-length array as last field should not generate bounds constraints. + // See https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html + return d->items[i]; +} + // CHECK-LABEL: define {{.*}} @test_flexible_array_member struct Data { int count; >From 033a1cefca45cbb2ae113cbf4a3d11ebfc5f0b5c Mon Sep 17 00:00:00 2001 From: Sebastian Pop <[email protected]> Date: Mon, 6 Oct 2025 11:43:47 -0500 Subject: [PATCH 7/7] handle VLAs --- clang/lib/CodeGen/CGExpr.cpp | 44 +++++++++++++------ .../CodeGen/array-bounds-constraints-safety.c | 8 ++-- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 1d7ebdfdd2279..edb83a5374fc0 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4608,6 +4608,11 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, if (SanOpts.has(SanitizerKind::ArrayBounds)) return; + // Use the provided IndexVal to avoid duplicating side effects. + // The caller has already emitted the index expression once. + if (!IndexVal) + return; + const Expr *Base = E->getBase(); const Expr *Idx = E->getIdx(); QualType BaseType = Base->getType(); @@ -4618,19 +4623,28 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, } } - // For now: only handle constant array types. + // Handle both constant arrays and VLAs (variable-length arrays.) const ConstantArrayType *CAT = getContext().getAsConstantArrayType(BaseType); - if (!CAT) - return; + llvm::Value *VLASize = nullptr; - llvm::APInt ArraySize = CAT->getSize(); + if (!CAT) { + if (const VariableArrayType *VAT = + getContext().getAsVariableArrayType(BaseType)) + VLASize = getVLASize(VAT).NumElts; + else + return; // Not a constant or VLA. + } + + llvm::APInt ArraySize; + if (CAT) + ArraySize = CAT->getSize(); // Don't generate assumes for flexible array member pattern. // Size-1 arrays: "struct { int len; char data[1]; }" (pre-C99 idiom.) // Zero-length arrays: "struct { int len; char data[0]; }" (GCC extension // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html) // Both patterns use arrays as placeholders for variable-length data. - if (ArraySize == 0 || ArraySize == 1) { + if (CAT && (ArraySize == 0 || ArraySize == 1)) { if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) { if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { const RecordDecl *RD = FD->getParent(); @@ -4649,15 +4663,18 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, QualType IdxType = Idx->getType(); llvm::Type *IndexType = ConvertType(IdxType); - llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0); - - uint64_t ArraySizeValue = ArraySize.getLimitedValue(); - llvm::Value *ArraySizeVal = llvm::ConstantInt::get(IndexType, ArraySizeValue); + llvm::Value *ArraySizeVal; - // Use the provided IndexVal to avoid duplicating side effects. - // The caller has already emitted the index expression once. - if (!IndexVal) - return; + if (CAT) + // Constant array: use compile-time size. + ArraySizeVal = + llvm::ConstantInt::get(IndexType, ArraySize.getLimitedValue()); + else + // VLA: use runtime size. + ArraySizeVal = + VLASize->getType() == IndexType + ? VLASize + : Builder.CreateIntCast(VLASize, IndexType, false, "vla.size.cast"); // Ensure index value has the same type as our constants. if (IndexVal->getType() != IndexType) { @@ -4672,6 +4689,7 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E, // array." if (IdxType->isSignedIntegerOrEnumerationType()) { // For signed indices: index >= 0 && index < size. + llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0); llvm::Value *LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero"); llvm::Value *UpperBound = diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c index e4a5c361391b6..9d74d4314e7cd 100644 --- a/clang/test/CodeGen/array-bounds-constraints-safety.c +++ b/clang/test/CodeGen/array-bounds-constraints-safety.c @@ -55,11 +55,13 @@ int test_pointer_parameter(int *arr, int i) { } // CHECK-LABEL: define {{.*}} @test_vla +void init_vla(int *arr, int n); + int test_vla(int n, int i) { int arr[n]; // Variable-length array. - // CHECK-NOT: call void @llvm.assume - // VLAs should NOT generate bounds constraints - // because the size is dynamic. + init_vla(arr, n); // Initialize to avoid UB. + // CHECK: call void @llvm.assume + // For VLAs, generate bounds constraints using the runtime size: 0 <= i < n. return arr[i]; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
