[clang] [clang] add array out-of-bounds access constraints using llvm.assume (PR #159046)
Sebastian Pop via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 6 09:04:20 PDT 2025
https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/159046
>From 7fdec0a94298caae4bb7bd69a9d165524df11fb7 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 16 Sep 2025 06:23:44 -0500
Subject: [PATCH 1/6] [clang] add array out-of-bounds access constraints using
llvm.assume
Following C and C++ standards, generate llvm.assume statements for array
subscript bounds to provide optimization hints.
For this code:
```
int arr[10];
int example(int i) {
return arr[i];
}
```
clang now generates an `assume(i < 10)`:
```
define i32 @example(i32 noundef %i) local_unnamed_addr #0 {
entry:
%idxprom = zext nneg i32 %i to i64
%bounds.constraint = icmp ult i32 %i, 10
tail call void @llvm.assume(i1 %bounds.constraint)
%arrayidx = getelementptr inbounds nuw i32, ptr @arr, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4, !tbaa !2
ret i32 %0
}
```
---
clang/lib/CodeGen/CGExpr.cpp | 112 ++++++++++++++++++
clang/lib/CodeGen/CGExprScalar.cpp | 3 +
clang/lib/CodeGen/CodeGenFunction.h | 7 ++
clang/test/CodeGen/array-bounds-constraints.c | 39 ++++++
4 files changed, 161 insertions(+)
create mode 100644 clang/test/CodeGen/array-bounds-constraints.c
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947882544..d4425d76d10fe 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4559,6 +4559,97 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
}
}
+/// Emit array bounds constraints using llvm.assume for optimization hints.
+///
+/// C Standard (ISO/IEC 9899:2011 - C11)
+/// Section J.2 (Undefined behavior): An array subscript is out of range, even
+/// if an object is apparently accessible with the given subscript (as in the
+/// lvalue expression a[1][7] given the declaration int a[4][5]) (6.5.6).
+///
+/// Section 6.5.6 (Additive operators): If both the pointer operand and the
+/// result point to elements of the same array object, or one past the last
+/// element of the array object, the evaluation shall not produce an overflow;
+/// otherwise, the behavior is undefined.
+///
+/// C++ Standard (ISO/IEC 14882 - 2017)
+/// Section 8.7 (Additive operators):
+/// 4 When an expression that has integral type is added to or subtracted from a
+/// pointer, the result has the type of the pointer operand. If the expression
+/// P points to element x[i] of an array object x with n elements,^86 the
+/// expressions P + J and J + P (where J has the value j) point to the
+/// (possibly-hypothetical) element x[i + j] if 0 ≤ i + j ≤ n; otherwise, the
+/// behavior is undefined. Likewise, the expression P - J points to the
+/// (possibly-hypothetical) element x[i − j] if 0 ≤ i − j ≤ n; otherwise, the
+/// behavior is undefined.
+/// ^86 A pointer past the last element of an array x of n elements is
+/// considered to be equivalent to a pointer to a hypothetical element x[n]
+/// for this purpose; see 6.9.2.
+///
+/// This function emits llvm.assume statements to inform the optimizer that
+/// array subscripts are within bounds, enabling better optimization without
+/// duplicating side effects from the subscript expression. The IndexVal
+/// parameter should be the already-emitted index value to avoid re-evaluation.
+void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
+ llvm::Value *IndexVal) {
+ const Expr *Base = E->getBase();
+ const Expr *Idx = E->getIdx();
+ QualType BaseType = Base->getType();
+
+ if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Base)) {
+ if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
+ BaseType = ICE->getSubExpr()->getType();
+ }
+ }
+
+ // For now: only handle constant array types.
+ const ConstantArrayType *CAT = getContext().getAsConstantArrayType(BaseType);
+ if (!CAT)
+ return;
+
+ llvm::APInt ArraySize = CAT->getSize();
+ if (ArraySize == 0)
+ return;
+
+ QualType IdxType = Idx->getType();
+ llvm::Type *IndexType = ConvertType(IdxType);
+ llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
+
+ uint64_t ArraySizeValue = ArraySize.getLimitedValue();
+ llvm::Value *ArraySizeVal = llvm::ConstantInt::get(IndexType, ArraySizeValue);
+
+ // Use the provided IndexVal to avoid duplicating side effects.
+ // The caller has already emitted the index expression once.
+ if (!IndexVal)
+ return;
+
+ // Ensure index value has the same type as our constants.
+ if (IndexVal->getType() != IndexType) {
+ bool IsSigned = IdxType->isSignedIntegerOrEnumerationType();
+ IndexVal = Builder.CreateIntCast(IndexVal, IndexType, IsSigned, "idx.cast");
+ }
+
+ // Create bounds constraint: 0 <= index && index < size.
+ // C arrays are 0-based, so valid indices are [0, size-1].
+ // This enforces the C18 standard requirement that array subscripts
+ // must be "greater than or equal to zero and less than the size of the
+ // array."
+ llvm::Value *LowerBound, *UpperBound;
+ if (IdxType->isSignedIntegerOrEnumerationType()) {
+ // For signed indices: index >= 0 && index < size.
+ LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
+ UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+ } else {
+ // For unsigned indices: index < size (>= 0 is implicit).
+ LowerBound = Builder.getTrue();
+ UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+ }
+
+ llvm::Value *BoundsConstraint =
+ Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
+ llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
+ Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
+}
+
LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
bool Accessed) {
// The index must always be an integer, which is not an aggregate. Emit it
@@ -4588,6 +4679,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
};
IdxPre = nullptr;
+ // Array bounds constraints will be emitted after index evaluation to avoid
+ // duplicating side effects from the index expression.
+
// If the base is a vector type, then we are forming a vector element lvalue
// with this subscript.
if (E->getBase()->getType()->isSubscriptableVectorType() &&
@@ -4595,6 +4689,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
// Emit the vector as an lvalue to get its address.
LValue LHS = EmitLValue(E->getBase());
auto *Idx = EmitIdxAfterBase(/*Promote*/false);
+
+ // Emit array bounds constraints for vector subscripts.
+ EmitArrayBoundsConstraints(E, Idx);
+
assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(),
LHS.getBaseInfo(), TBAAAccessInfo());
@@ -4635,6 +4733,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+ // Emit array bounds constraints for VLA access (though VLAs typically don't
+ // have constant bounds).
+ EmitArrayBoundsConstraints(E, Idx);
+
// The element count here is the total number of non-VLA elements.
llvm::Value *numElements = getVLASize(vla).NumElts;
@@ -4659,6 +4761,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+ // Emit array bounds constraints for ObjC interface access.
+ EmitArrayBoundsConstraints(E, Idx);
+
CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
llvm::Value *InterfaceSizeVal =
llvm::ConstantInt::get(Idx->getType(), InterfaceSize.getQuantity());
@@ -4694,6 +4799,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
ArrayLV = EmitLValue(Array);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+ // Emit array bounds constraints for optimization.
+ EmitArrayBoundsConstraints(E, Idx);
+
if (SanOpts.has(SanitizerKind::ArrayBounds))
EmitCountedByBoundsChecking(Array, Idx, ArrayLV.getAddress(),
E->getIdx()->getType(), Array->getType(),
@@ -4737,6 +4845,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Address BaseAddr =
EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+
+ // Emit array bounds constraints for pointer-based array access.
+ EmitArrayBoundsConstraints(E, Idx);
+
QualType ptrType = E->getBase()->getType();
Addr = emitArraySubscriptGEP(*this, BaseAddr, Idx, E->getType(),
!getLangOpts().PointerOverflowDefined,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 4fa25c5d66669..28f702f9237e4 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2100,6 +2100,9 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
if (CGF.SanOpts.has(SanitizerKind::ArrayBounds))
CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
+ // Emit array bounds constraints for vector element access.
+ CGF.EmitArrayBoundsConstraints(E, Idx);
+
return Builder.CreateExtractElement(Base, Idx, "vecext");
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 727487b46054f..6283841b7b170 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3341,6 +3341,13 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *Index, QualType IndexType,
QualType IndexedType, bool Accessed);
+ /// Emit array bounds constraints using llvm.assume for optimization hints.
+ /// Emits assume statements for array bounds without duplicating side effects.
+ /// Takes the already-emitted index value to avoid re-evaluating expressions
+ /// with side effects. Helps optimizer with vectorization and bounds analysis.
+ void EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
+ llvm::Value *IndexVal);
+
/// Returns debug info, with additional annotation if
/// CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo[Ordinal] is enabled for
/// any of the ordinals.
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
new file mode 100644
index 0000000000000..77e5199a1573a
--- /dev/null
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -0,0 +1,39 @@
+// Test that array bounds constraints generate llvm.assume statements for optimization hints.
+// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s
+
+// This test verifies that clang generates llvm.assume statements to inform the
+// optimizer that array subscripts are within bounds to enable better optimization.
+
+// CHECK-LABEL: define {{.*}} @test_simple_array
+int test_simple_array(int i) {
+ int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
+ // CHECK: %{{.*}} = icmp ult i32 %i, 10
+ // CHECK: call void @llvm.assume(i1 %{{.*}})
+ return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_multidimensional_array
+int test_multidimensional_array(int i, int j) {
+ int arr[5][8]; // Valid indices: i in [0, 4], j in [0, 7]
+ // CHECK: %{{.*}} = icmp ult i32 %i, 5
+ // CHECK: call void @llvm.assume(i1 %{{.*}})
+ // CHECK: %{{.*}} = icmp ult i32 %j, 8
+ // CHECK: call void @llvm.assume(i1 %{{.*}})
+ return arr[i][j];
+}
+
+// CHECK-LABEL: define {{.*}} @test_unsigned_index
+int test_unsigned_index(unsigned int i) {
+ int arr[10];
+ // CHECK: %{{.*}} = icmp ult i32 %i, 10
+ // CHECK: call void @llvm.assume(i1 %{{.*}})
+ return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_store_undef
+void test_store_undef(int i, int value) {
+ int arr[10];
+ // CHECK: %{{.*}} = icmp ult i32 %i, 10
+ // CHECK: call void @llvm.assume(i1 %{{.*}})
+ arr[i] = value;
+}
>From 702d9dd71e4a646077e5d3347f36a43807dcec4a Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 16 Sep 2025 06:23:44 -0500
Subject: [PATCH 2/6] add -fassume-array-bounds flag disabled by default for
now
Sanitizer interaction: assume generation is disabled when -fsanitize=array-bounds is active.
Flexible array detection: skip size-1 arrays as last struct field.
---
clang/include/clang/Basic/CodeGenOptions.def | 1 +
clang/include/clang/Driver/Options.td | 5 ++
clang/lib/CodeGen/CGExpr.cpp | 59 +++++++++---
.../CodeGen/array-bounds-constraints-safety.c | 89 +++++++++++++++++++
clang/test/CodeGen/array-bounds-constraints.c | 9 +-
5 files changed, 149 insertions(+), 14 deletions(-)
create mode 100644 clang/test/CodeGen/array-bounds-constraints-safety.c
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 872f73ebf3810..274ae075c2de7 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -33,6 +33,7 @@ CODEGENOPT(ImplicitMapSyms, 1, 0, Benign) ///< -Wa,-mmapsyms=implicit
CODEGENOPT(AsmVerbose , 1, 0, Benign) ///< -dA, -fverbose-asm.
CODEGENOPT(PreserveAsmComments, 1, 1, Benign) ///< -dA, -fno-preserve-as-comments.
CODEGENOPT(AssumeSaneOperatorNew , 1, 1, Benign) ///< implicit __attribute__((malloc)) operator new
+CODEGENOPT(AssumeArrayBounds , 1, 0, Benign) ///< Generate llvm.assume for array bounds.
CODEGENOPT(AssumeUniqueVTables , 1, 1, Benign) ///< Assume a class has only one vtable.
CODEGENOPT(Autolink , 1, 1, Benign) ///< -fno-autolink
CODEGENOPT(AutoImport , 1, 1, Benign) ///< -fno-auto-import
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index def7c09d58cfb..d4b324e9650a7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1603,6 +1603,11 @@ defm assume_unique_vtables : BoolFOption<"assume-unique-vtables",
BothFlags<[], [ClangOption, CLOption]>>;
def fassume_sane_operator_new : Flag<["-"], "fassume-sane-operator-new">, Group<f_Group>;
+defm assume_array_bounds : BoolFOption<"assume-array-bounds",
+ CodeGenOpts<"AssumeArrayBounds">, DefaultFalse,
+ PosFlag<SetTrue, [], [ClangOption, CC1Option],
+ "Generate llvm.assume for array bounds to enable optimizations (may break code with intentional out-of-bounds access)">,
+ NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
def fastcp : Flag<["-"], "fastcp">, Group<f_Group>;
def fastf : Flag<["-"], "fastf">, Group<f_Group>;
def fast : Flag<["-"], "fast">, Group<f_Group>;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d4425d76d10fe..13416228873cc 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4589,8 +4589,25 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
/// array subscripts are within bounds, enabling better optimization without
/// duplicating side effects from the subscript expression. The IndexVal
/// parameter should be the already-emitted index value to avoid re-evaluation.
+///
+/// Code that intentionally accesses out-of-bounds (UB) may break with
+/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or
+/// flexible arrays.) Disabled when -fsanitize=array-bounds is active.
+///
void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
llvm::Value *IndexVal) {
+ // Disable with -fno-assume-array-bounds.
+ if (!CGM.getCodeGenOpts().AssumeArrayBounds)
+ return;
+
+ // Disable at -O0.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ return;
+
+ // Disable with array-bounds sanitizer.
+ if (SanOpts.has(SanitizerKind::ArrayBounds))
+ return;
+
const Expr *Base = E->getBase();
const Expr *Idx = E->getIdx();
QualType BaseType = Base->getType();
@@ -4610,6 +4627,26 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
if (ArraySize == 0)
return;
+ // Don't generate assumes for flexible array member pattern.
+ // Arrays of size 1 in structs are often used as placeholders for
+ // variable-length data (pre-C99 flexible array member idiom.)
+ if (ArraySize == 1) {
+ if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
+ if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+ const RecordDecl *RD = FD->getParent();
+ // Check if this field is the last field in the record.
+ // Only the last field can be a flexible array member.
+ const FieldDecl *LastField = nullptr;
+ for (const auto *Field : RD->fields())
+ LastField = Field;
+ if (LastField == FD)
+ // This is a size-1 array as the last field in a struct.
+ // Likely a flexible array member pattern - skip assumes.
+ return;
+ }
+ }
+ }
+
QualType IdxType = Idx->getType();
llvm::Type *IndexType = ConvertType(IdxType);
llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
@@ -4633,21 +4670,21 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
// This enforces the C18 standard requirement that array subscripts
// must be "greater than or equal to zero and less than the size of the
// array."
- llvm::Value *LowerBound, *UpperBound;
if (IdxType->isSignedIntegerOrEnumerationType()) {
// For signed indices: index >= 0 && index < size.
- LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
- UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+ llvm::Value *LowerBound =
+ Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
+ llvm::Value *UpperBound =
+ Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+ llvm::Value *BoundsConstraint =
+ Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
+ Builder.CreateAssumption(BoundsConstraint);
} else {
- // For unsigned indices: index < size (>= 0 is implicit).
- LowerBound = Builder.getTrue();
- UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+ // For unsigned indices: index < size (>= 0 is implicit.)
+ llvm::Value *UpperBound =
+ Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+ Builder.CreateAssumption(UpperBound);
}
-
- llvm::Value *BoundsConstraint =
- Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
- llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
- Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
}
LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
new file mode 100644
index 0000000000000..bbd72900361ec
--- /dev/null
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
+// Test that array bounds constraints are NOT applied to cases that might
+// break real-world code with intentional out-of-bounds access patterns.
+
+// C18 standard allows one-past-the-end pointers, and some legacy code
+// intentionally accesses out-of-bounds for performance or compatibility.
+// This test verifies that bounds constraints are only applied to safe cases.
+
+// CHECK-LABEL: define {{.*}} @test_flexible_array_member
+struct Data {
+ int count;
+ int items[1]; // Flexible array member pattern (pre-C99 style)
+};
+
+int test_flexible_array_member(struct Data *d, int i) {
+ // CHECK-NOT: call void @llvm.assume
+ // Flexible array member pattern (size 1 array as last field) should NOT
+ // generate bounds constraints because items[1] is just a placeholder
+ // for a larger array allocated with `malloc (sizeof (struct Data) + 42)`.
+ return d->items[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_not_flexible_array
+struct NotFlexible {
+ int items[1]; // Size 1 array but NOT the last field.
+ int count; // Something comes after it.
+};
+
+int test_not_flexible_array(struct NotFlexible *s, int i) {
+ // CHECK: call void @llvm.assume
+ // This is NOT a flexible array pattern (not the last field),
+ // so we're fine generating `assume(i < 1)`.
+ return s->items[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_pointer_parameter
+int test_pointer_parameter(int *arr, int i) {
+ // CHECK-NOT: call void @llvm.assume
+ // Pointer parameters should NOT generate bounds constraints
+ // because we don't know the actual array size.
+ return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_vla
+int test_vla(int n, int i) {
+ int arr[n]; // Variable-length array.
+ // CHECK-NOT: call void @llvm.assume
+ // VLAs should NOT generate bounds constraints
+ // because the size is dynamic.
+ return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_one_past_end
+extern int extern_array[100];
+int *test_one_past_end(void) {
+ // CHECK-NOT: call void @llvm.assume
+ // Taking address of one-past-the-end is allowed by C standard.
+ // We should NOT assume anything about this access.
+ return &extern_array[100]; // Legal: one past the end.
+}
+
+// CHECK-LABEL: define {{.*}} @test_extern_array
+int test_extern_array(int i) {
+ // CHECK: call void @llvm.assume
+ // This will generate bounds constraints.
+ // The array is a constant-size global array.
+ // This is the safe case where we want optimization hints.
+ return extern_array[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_local_constant_array
+int test_local_constant_array(int i) {
+ int arr[10];
+ // CHECK: call void @llvm.assume
+ // This will generate bounds constraints.
+ // We know the exact size of this alloca array.
+ // This is the safe case where we want optimization hints.
+ return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_malloc_array
+int *my_malloc(int);
+int test_malloc_array(int i) {
+ // CHECK-NOT: call void @llvm.assume
+ // Dynamically allocated arrays accessed via pointers do not get bounds
+ // constraints.
+ int *x = my_malloc(100 * sizeof(int));
+ return x[i];
+}
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index 77e5199a1573a..a2f3ad9e514cb 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -1,14 +1,17 @@
-// Test that array bounds constraints generate llvm.assume statements for optimization hints.
-// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s
-
// This test verifies that clang generates llvm.assume statements to inform the
// optimizer that array subscripts are within bounds to enable better optimization.
+// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
+
+// Verify no assumes are generated.
+// RUN: %clang_cc1 -emit-llvm -O2 -fno-assume-array-bounds %s -o - | FileCheck %s -check-prefix=NO-FLAG
// CHECK-LABEL: define {{.*}} @test_simple_array
+// NO-FLAG-LABEL: define {{.*}} @test_simple_array
int test_simple_array(int i) {
int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
// CHECK: %{{.*}} = icmp ult i32 %i, 10
// CHECK: call void @llvm.assume(i1 %{{.*}})
+ // NO-FLAG-NOT: call void @llvm.assume
return arr[i];
}
>From ec1024d012bd336d9efa996709bab3c23e59ab36 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 4 Oct 2025 18:09:35 -0500
Subject: [PATCH 3/6] fix UB in testcase
---
clang/test/CodeGen/array-bounds-constraints-safety.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
index bbd72900361ec..3484853ceb662 100644
--- a/clang/test/CodeGen/array-bounds-constraints-safety.c
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -69,8 +69,10 @@ int test_extern_array(int i) {
}
// CHECK-LABEL: define {{.*}} @test_local_constant_array
+void init_array(int *arr);
int test_local_constant_array(int i) {
int arr[10];
+ init_array(arr); // Initialize to avoid UB from uninitialized read.
// CHECK: call void @llvm.assume
// This will generate bounds constraints.
// We know the exact size of this alloca array.
>From 77e2606c98c8785227a3e08c0ea21c4166415805 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 4 Oct 2025 18:13:31 -0500
Subject: [PATCH 4/6] fix UB in testcase
---
clang/test/CodeGen/array-bounds-constraints.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index a2f3ad9e514cb..b6bc9050b039a 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -7,8 +7,10 @@
// CHECK-LABEL: define {{.*}} @test_simple_array
// NO-FLAG-LABEL: define {{.*}} @test_simple_array
+void init_array(int *arr);
int test_simple_array(int i) {
int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
+ init_array(arr); // Initialize to avoid UB from uninitialized read.
// CHECK: %{{.*}} = icmp ult i32 %i, 10
// CHECK: call void @llvm.assume(i1 %{{.*}})
// NO-FLAG-NOT: call void @llvm.assume
@@ -18,6 +20,7 @@ int test_simple_array(int i) {
// CHECK-LABEL: define {{.*}} @test_multidimensional_array
int test_multidimensional_array(int i, int j) {
int arr[5][8]; // Valid indices: i in [0, 4], j in [0, 7]
+ init_array(arr[0]); // Initialize to avoid UB from uninitialized read.
// CHECK: %{{.*}} = icmp ult i32 %i, 5
// CHECK: call void @llvm.assume(i1 %{{.*}})
// CHECK: %{{.*}} = icmp ult i32 %j, 8
@@ -28,6 +31,7 @@ int test_multidimensional_array(int i, int j) {
// CHECK-LABEL: define {{.*}} @test_unsigned_index
int test_unsigned_index(unsigned int i) {
int arr[10];
+ init_array(arr); // Initialize to avoid UB from uninitialized read.
// CHECK: %{{.*}} = icmp ult i32 %i, 10
// CHECK: call void @llvm.assume(i1 %{{.*}})
return arr[i];
>From 0ed60c2504985275c70838b91c3782fcf96d6eed Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Mon, 6 Oct 2025 09:28:57 -0500
Subject: [PATCH 5/6] avoid optimization in testcase
---
clang/test/CodeGen/array-bounds-constraints.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index b6bc9050b039a..ed4e3a603ac90 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -43,4 +43,5 @@ void test_store_undef(int i, int value) {
// CHECK: %{{.*}} = icmp ult i32 %i, 10
// CHECK: call void @llvm.assume(i1 %{{.*}})
arr[i] = value;
+ init_array(arr); // Avoid optimization of the above statement.
}
>From 1c11e607e149dce1df94a3ef2e515115614f2393 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Mon, 6 Oct 2025 10:57:58 -0500
Subject: [PATCH 6/6] handle zero-length array pattern
---
clang/lib/CodeGen/CGExpr.cpp | 12 ++++++------
.../test/CodeGen/array-bounds-constraints-safety.c | 13 +++++++++++++
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 13416228873cc..1d7ebdfdd2279 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4624,13 +4624,13 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
return;
llvm::APInt ArraySize = CAT->getSize();
- if (ArraySize == 0)
- return;
// Don't generate assumes for flexible array member pattern.
- // Arrays of size 1 in structs are often used as placeholders for
- // variable-length data (pre-C99 flexible array member idiom.)
- if (ArraySize == 1) {
+ // Size-1 arrays: "struct { int len; char data[1]; }" (pre-C99 idiom.)
+ // Zero-length arrays: "struct { int len; char data[0]; }" (GCC extension
+ // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html)
+ // Both patterns use arrays as placeholders for variable-length data.
+ if (ArraySize == 0 || ArraySize == 1) {
if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
const RecordDecl *RD = FD->getParent();
@@ -4640,7 +4640,7 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
for (const auto *Field : RD->fields())
LastField = Field;
if (LastField == FD)
- // This is a size-1 array as the last field in a struct.
+ // This is a zero-length or size-1 array as the last field.
// Likely a flexible array member pattern - skip assumes.
return;
}
diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
index 3484853ceb662..e4a5c361391b6 100644
--- a/clang/test/CodeGen/array-bounds-constraints-safety.c
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -6,6 +6,19 @@
// intentionally accesses out-of-bounds for performance or compatibility.
// This test verifies that bounds constraints are only applied to safe cases.
+// CHECK-LABEL: define {{.*}} @test_zero_length_array
+struct ZeroLengthData {
+ int count;
+ int items[0]; // GNU C extension: zero-length array
+};
+
+int test_zero_length_array(struct ZeroLengthData *d, int i) {
+ // CHECK-NOT: call void @llvm.assume
+ // Zero-length array as last field should not generate bounds constraints.
+ // See https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
+ return d->items[i];
+}
+
// CHECK-LABEL: define {{.*}} @test_flexible_array_member
struct Data {
int count;
More information about the cfe-commits
mailing list