[clang] [clang] add array out-of-bounds access constraints using llvm.assume (PR #159046)

Sebastian Pop via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 6 09:04:20 PDT 2025


https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/159046

>From 7fdec0a94298caae4bb7bd69a9d165524df11fb7 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 16 Sep 2025 06:23:44 -0500
Subject: [PATCH 1/6] [clang] add array out-of-bounds access constraints using
 llvm.assume

Following C and C++ standards, generate llvm.assume statements for array
subscript bounds to provide optimization hints.

For this code:
```
int arr[10];
int example(int i) {
  return arr[i];
}
```
clang now generates an `assume(i < 10)`:
```
define i32 @example(i32 noundef %i) local_unnamed_addr #0 {
entry:
  %idxprom = zext nneg i32 %i to i64
  %bounds.constraint = icmp ult i32 %i, 10
  tail call void @llvm.assume(i1 %bounds.constraint)
  %arrayidx = getelementptr inbounds nuw i32, ptr @arr, i64 %idxprom
  %0 = load i32, ptr %arrayidx, align 4, !tbaa !2
  ret i32 %0
}
```
---
 clang/lib/CodeGen/CGExpr.cpp                  | 112 ++++++++++++++++++
 clang/lib/CodeGen/CGExprScalar.cpp            |   3 +
 clang/lib/CodeGen/CodeGenFunction.h           |   7 ++
 clang/test/CodeGen/array-bounds-constraints.c |  39 ++++++
 4 files changed, 161 insertions(+)
 create mode 100644 clang/test/CodeGen/array-bounds-constraints.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947882544..d4425d76d10fe 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4559,6 +4559,97 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
   }
 }
 
+/// Emit array bounds constraints using llvm.assume for optimization hints.
+///
+/// C Standard (ISO/IEC 9899:2011 - C11)
+/// Section J.2 (Undefined behavior): An array subscript is out of range, even
+/// if an object is apparently accessible with the given subscript (as in the
+/// lvalue expression a[1][7] given the declaration int a[4][5]) (6.5.6).
+///
+/// Section 6.5.6 (Additive operators): If both the pointer operand and the
+/// result point to elements of the same array object, or one past the last
+/// element of the array object, the evaluation shall not produce an overflow;
+/// otherwise, the behavior is undefined.
+///
+/// C++ Standard (ISO/IEC 14882 - 2017)
+/// Section 8.7 (Additive operators):
+/// 4 When an expression that has integral type is added to or subtracted from a
+///   pointer, the result has the type of the pointer operand. If the expression
+///   P points to element x[i] of an array object x with n elements,^86 the
+///   expressions P + J and J + P (where J has the value j) point to the
+///   (possibly-hypothetical) element x[i + j] if 0 ≤ i + j ≤ n; otherwise, the
+///   behavior is undefined. Likewise, the expression P - J points to the
+///   (possibly-hypothetical) element x[i − j] if 0 ≤ i − j ≤ n; otherwise, the
+///   behavior is undefined.
+/// ^86 A pointer past the last element of an array x of n elements is
+///     considered to be equivalent to a pointer to a hypothetical element x[n]
+///     for this purpose; see 6.9.2.
+///
+/// This function emits llvm.assume statements to inform the optimizer that
+/// array subscripts are within bounds, enabling better optimization without
+/// duplicating side effects from the subscript expression. The IndexVal
+/// parameter should be the already-emitted index value to avoid re-evaluation.
+void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
+                                                 llvm::Value *IndexVal) {
+  const Expr *Base = E->getBase();
+  const Expr *Idx = E->getIdx();
+  QualType BaseType = Base->getType();
+
+  if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Base)) {
+    if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
+      BaseType = ICE->getSubExpr()->getType();
+    }
+  }
+
+  // For now: only handle constant array types.
+  const ConstantArrayType *CAT = getContext().getAsConstantArrayType(BaseType);
+  if (!CAT)
+    return;
+
+  llvm::APInt ArraySize = CAT->getSize();
+  if (ArraySize == 0)
+    return;
+
+  QualType IdxType = Idx->getType();
+  llvm::Type *IndexType = ConvertType(IdxType);
+  llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
+
+  uint64_t ArraySizeValue = ArraySize.getLimitedValue();
+  llvm::Value *ArraySizeVal = llvm::ConstantInt::get(IndexType, ArraySizeValue);
+
+  // Use the provided IndexVal to avoid duplicating side effects.
+  // The caller has already emitted the index expression once.
+  if (!IndexVal)
+    return;
+
+  // Ensure index value has the same type as our constants.
+  if (IndexVal->getType() != IndexType) {
+    bool IsSigned = IdxType->isSignedIntegerOrEnumerationType();
+    IndexVal = Builder.CreateIntCast(IndexVal, IndexType, IsSigned, "idx.cast");
+  }
+
+  // Create bounds constraint: 0 <= index && index < size.
+  // C arrays are 0-based, so valid indices are [0, size-1].
+  // This enforces the C18 standard requirement that array subscripts
+  // must be "greater than or equal to zero and less than the size of the
+  // array."
+  llvm::Value *LowerBound, *UpperBound;
+  if (IdxType->isSignedIntegerOrEnumerationType()) {
+    // For signed indices: index >= 0 && index < size.
+    LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
+    UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+  } else {
+    // For unsigned indices: index < size (>= 0 is implicit).
+    LowerBound = Builder.getTrue();
+    UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+  }
+
+  llvm::Value *BoundsConstraint =
+      Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
+  llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
+  Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
+}
+
 LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
                                                bool Accessed) {
   // The index must always be an integer, which is not an aggregate.  Emit it
@@ -4588,6 +4679,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
   };
   IdxPre = nullptr;
 
+  // Array bounds constraints will be emitted after index evaluation to avoid
+  // duplicating side effects from the index expression.
+
   // If the base is a vector type, then we are forming a vector element lvalue
   // with this subscript.
   if (E->getBase()->getType()->isSubscriptableVectorType() &&
@@ -4595,6 +4689,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     // Emit the vector as an lvalue to get its address.
     LValue LHS = EmitLValue(E->getBase());
     auto *Idx = EmitIdxAfterBase(/*Promote*/false);
+
+    // Emit array bounds constraints for vector subscripts.
+    EmitArrayBoundsConstraints(E, Idx);
+
     assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
     return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(),
                                  LHS.getBaseInfo(), TBAAAccessInfo());
@@ -4635,6 +4733,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
+    // Emit array bounds constraints for VLA access (though VLAs typically don't
+    // have constant bounds).
+    EmitArrayBoundsConstraints(E, Idx);
+
     // The element count here is the total number of non-VLA elements.
     llvm::Value *numElements = getVLASize(vla).NumElts;
 
@@ -4659,6 +4761,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
+    // Emit array bounds constraints for ObjC interface access.
+    EmitArrayBoundsConstraints(E, Idx);
+
     CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
     llvm::Value *InterfaceSizeVal =
         llvm::ConstantInt::get(Idx->getType(), InterfaceSize.getQuantity());
@@ -4694,6 +4799,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
       ArrayLV = EmitLValue(Array);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
 
+    // Emit array bounds constraints for optimization.
+    EmitArrayBoundsConstraints(E, Idx);
+
     if (SanOpts.has(SanitizerKind::ArrayBounds))
       EmitCountedByBoundsChecking(Array, Idx, ArrayLV.getAddress(),
                                   E->getIdx()->getType(), Array->getType(),
@@ -4737,6 +4845,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     Address BaseAddr =
         EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
     auto *Idx = EmitIdxAfterBase(/*Promote*/true);
+
+    // Emit array bounds constraints for pointer-based array access.
+    EmitArrayBoundsConstraints(E, Idx);
+
     QualType ptrType = E->getBase()->getType();
     Addr = emitArraySubscriptGEP(*this, BaseAddr, Idx, E->getType(),
                                  !getLangOpts().PointerOverflowDefined,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 4fa25c5d66669..28f702f9237e4 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2100,6 +2100,9 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   if (CGF.SanOpts.has(SanitizerKind::ArrayBounds))
     CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
 
+  // Emit array bounds constraints for vector element access.
+  CGF.EmitArrayBoundsConstraints(E, Idx);
+
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 727487b46054f..6283841b7b170 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3341,6 +3341,13 @@ class CodeGenFunction : public CodeGenTypeCache {
                            llvm::Value *Index, QualType IndexType,
                            QualType IndexedType, bool Accessed);
 
+  /// Emit array bounds constraints using llvm.assume for optimization hints.
+  /// Emits assume statements for array bounds without duplicating side effects.
+  /// Takes the already-emitted index value to avoid re-evaluating expressions
+  /// with side effects. Helps optimizer with vectorization and bounds analysis.
+  void EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
+                                  llvm::Value *IndexVal);
+
   /// Returns debug info, with additional annotation if
   /// CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo[Ordinal] is enabled for
   /// any of the ordinals.
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
new file mode 100644
index 0000000000000..77e5199a1573a
--- /dev/null
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -0,0 +1,39 @@
+// Test that array bounds constraints generate llvm.assume statements for optimization hints.
+// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s
+
+// This test verifies that clang generates llvm.assume statements to inform the
+// optimizer that array subscripts are within bounds to enable better optimization.
+
+// CHECK-LABEL: define {{.*}} @test_simple_array
+int test_simple_array(int i) {
+  int arr[10];  // C arrays are 0-based: valid indices are [0, 9]
+  // CHECK: %{{.*}} = icmp ult i32 %i, 10
+  // CHECK: call void @llvm.assume(i1 %{{.*}})
+  return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_multidimensional_array
+int test_multidimensional_array(int i, int j) {
+  int arr[5][8];  // Valid indices: i in [0, 4], j in [0, 7]
+  // CHECK: %{{.*}} = icmp ult i32 %i, 5
+  // CHECK: call void @llvm.assume(i1 %{{.*}})
+  // CHECK: %{{.*}} = icmp ult i32 %j, 8
+  // CHECK: call void @llvm.assume(i1 %{{.*}})
+  return arr[i][j];
+}
+
+// CHECK-LABEL: define {{.*}} @test_unsigned_index
+int test_unsigned_index(unsigned int i) {
+  int arr[10];
+  // CHECK: %{{.*}} = icmp ult i32 %i, 10
+  // CHECK: call void @llvm.assume(i1 %{{.*}})
+  return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_store_undef
+void test_store_undef(int i, int value) {
+  int arr[10];
+  // CHECK: %{{.*}} = icmp ult i32 %i, 10
+  // CHECK: call void @llvm.assume(i1 %{{.*}})
+  arr[i] = value;
+}

>From 702d9dd71e4a646077e5d3347f36a43807dcec4a Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Tue, 16 Sep 2025 06:23:44 -0500
Subject: [PATCH 2/6] add -fassume-array-bounds flag disabled by default for
 now

Sanitizer interaction: assume generation is disabled when -fsanitize=array-bounds is active.

Flexible array detection: skip size-1 arrays as last struct field.
---
 clang/include/clang/Basic/CodeGenOptions.def  |  1 +
 clang/include/clang/Driver/Options.td         |  5 ++
 clang/lib/CodeGen/CGExpr.cpp                  | 59 +++++++++---
 .../CodeGen/array-bounds-constraints-safety.c | 89 +++++++++++++++++++
 clang/test/CodeGen/array-bounds-constraints.c |  9 +-
 5 files changed, 149 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/CodeGen/array-bounds-constraints-safety.c

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 872f73ebf3810..274ae075c2de7 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -33,6 +33,7 @@ CODEGENOPT(ImplicitMapSyms, 1, 0, Benign) ///< -Wa,-mmapsyms=implicit
 CODEGENOPT(AsmVerbose        , 1, 0, Benign) ///< -dA, -fverbose-asm.
 CODEGENOPT(PreserveAsmComments, 1, 1, Benign) ///< -dA, -fno-preserve-as-comments.
 CODEGENOPT(AssumeSaneOperatorNew , 1, 1, Benign) ///< implicit __attribute__((malloc)) operator new
+CODEGENOPT(AssumeArrayBounds , 1, 0, Benign) ///< Generate llvm.assume for array bounds.
 CODEGENOPT(AssumeUniqueVTables , 1, 1, Benign) ///< Assume a class has only one vtable.
 CODEGENOPT(Autolink          , 1, 1, Benign) ///< -fno-autolink
 CODEGENOPT(AutoImport        , 1, 1, Benign) ///< -fno-auto-import
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index def7c09d58cfb..d4b324e9650a7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1603,6 +1603,11 @@ defm assume_unique_vtables : BoolFOption<"assume-unique-vtables",
   BothFlags<[], [ClangOption, CLOption]>>;
 
 def fassume_sane_operator_new : Flag<["-"], "fassume-sane-operator-new">, Group<f_Group>;
+defm assume_array_bounds : BoolFOption<"assume-array-bounds",
+  CodeGenOpts<"AssumeArrayBounds">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+          "Generate llvm.assume for array bounds to enable optimizations (may break code with intentional out-of-bounds access)">,
+  NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
 def fastcp : Flag<["-"], "fastcp">, Group<f_Group>;
 def fastf : Flag<["-"], "fastf">, Group<f_Group>;
 def fast : Flag<["-"], "fast">, Group<f_Group>;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d4425d76d10fe..13416228873cc 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4589,8 +4589,25 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
 /// array subscripts are within bounds, enabling better optimization without
 /// duplicating side effects from the subscript expression. The IndexVal
 /// parameter should be the already-emitted index value to avoid re-evaluation.
+///
+/// Code that intentionally accesses out-of-bounds (UB) may break with
+/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or
+/// flexible arrays.) Disabled when -fsanitize=array-bounds is active.
+///
 void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
                                                  llvm::Value *IndexVal) {
+  // Disable with -fno-assume-array-bounds.
+  if (!CGM.getCodeGenOpts().AssumeArrayBounds)
+    return;
+
+  // Disable at -O0.
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+    return;
+
+  // Disable with array-bounds sanitizer.
+  if (SanOpts.has(SanitizerKind::ArrayBounds))
+    return;
+
   const Expr *Base = E->getBase();
   const Expr *Idx = E->getIdx();
   QualType BaseType = Base->getType();
@@ -4610,6 +4627,26 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
   if (ArraySize == 0)
     return;
 
+  // Don't generate assumes for flexible array member pattern.
+  // Arrays of size 1 in structs are often used as placeholders for
+  // variable-length data (pre-C99 flexible array member idiom.)
+  if (ArraySize == 1) {
+    if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
+      if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+        const RecordDecl *RD = FD->getParent();
+        // Check if this field is the last field in the record.
+        // Only the last field can be a flexible array member.
+        const FieldDecl *LastField = nullptr;
+        for (const auto *Field : RD->fields())
+          LastField = Field;
+        if (LastField == FD)
+          // This is a size-1 array as the last field in a struct.
+          // Likely a flexible array member pattern - skip assumes.
+          return;
+      }
+    }
+  }
+
   QualType IdxType = Idx->getType();
   llvm::Type *IndexType = ConvertType(IdxType);
   llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
@@ -4633,21 +4670,21 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
   // This enforces the C18 standard requirement that array subscripts
   // must be "greater than or equal to zero and less than the size of the
   // array."
-  llvm::Value *LowerBound, *UpperBound;
   if (IdxType->isSignedIntegerOrEnumerationType()) {
     // For signed indices: index >= 0 && index < size.
-    LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
-    UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+    llvm::Value *LowerBound =
+        Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
+    llvm::Value *UpperBound =
+        Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
+    llvm::Value *BoundsConstraint =
+        Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
+    Builder.CreateAssumption(BoundsConstraint);
   } else {
-    // For unsigned indices: index < size (>= 0 is implicit).
-    LowerBound = Builder.getTrue();
-    UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+    // For unsigned indices: index < size (>= 0 is implicit.)
+    llvm::Value *UpperBound =
+        Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
+    Builder.CreateAssumption(UpperBound);
   }
-
-  llvm::Value *BoundsConstraint =
-      Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
-  llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
-  Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
 }
 
 LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
new file mode 100644
index 0000000000000..bbd72900361ec
--- /dev/null
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
+// Test that array bounds constraints are NOT applied to cases that might
+// break real-world code with intentional out-of-bounds access patterns.
+
+// C18 standard allows one-past-the-end pointers, and some legacy code
+// intentionally accesses out-of-bounds for performance or compatibility.
+// This test verifies that bounds constraints are only applied to safe cases.
+
+// CHECK-LABEL: define {{.*}} @test_flexible_array_member
+struct Data {
+    int count;
+    int items[1];  // Flexible array member pattern (pre-C99 style)
+};
+
+int test_flexible_array_member(struct Data *d, int i) {
+  // CHECK-NOT: call void @llvm.assume
+  // Flexible array member pattern (size 1 array as last field) should NOT
+  // generate bounds constraints because items[1] is just a placeholder
+  // for a larger array allocated with `malloc (sizeof (struct Data) + 42)`.
+  return d->items[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_not_flexible_array
+struct NotFlexible {
+    int items[1];  // Size 1 array but NOT the last field.
+    int count;     // Something comes after it.
+};
+
+int test_not_flexible_array(struct NotFlexible *s, int i) {
+  // CHECK: call void @llvm.assume
+  // This is NOT a flexible array pattern (not the last field),
+  // so we're fine generating `assume(i < 1)`.
+  return s->items[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_pointer_parameter
+int test_pointer_parameter(int *arr, int i) {
+  // CHECK-NOT: call void @llvm.assume
+  // Pointer parameters should NOT generate bounds constraints
+  // because we don't know the actual array size.
+  return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_vla
+int test_vla(int n, int i) {
+  int arr[n];  // Variable-length array.
+  // CHECK-NOT: call void @llvm.assume
+  // VLAs should NOT generate bounds constraints
+  // because the size is dynamic.
+  return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_one_past_end
+extern int extern_array[100];
+int *test_one_past_end(void) {
+  // CHECK-NOT: call void @llvm.assume
+  // Taking address of one-past-the-end is allowed by C standard.
+  // We should NOT assume anything about this access.
+  return &extern_array[100];  // Legal: one past the end.
+}
+
+// CHECK-LABEL: define {{.*}} @test_extern_array
+int test_extern_array(int i) {
+  // CHECK: call void @llvm.assume
+  // This will generate bounds constraints.
+  // The array is a constant-size global array.
+  // This is the safe case where we want optimization hints.
+  return extern_array[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_local_constant_array
+int test_local_constant_array(int i) {
+  int arr[10];
+  // CHECK: call void @llvm.assume
+  // This will generate bounds constraints.
+  // We know the exact size of this alloca array.
+  // This is the safe case where we want optimization hints.
+  return arr[i];
+}
+
+// CHECK-LABEL: define {{.*}} @test_malloc_array
+int *my_malloc(int);
+int test_malloc_array(int i) {
+  // CHECK-NOT: call void @llvm.assume
+  // Dynamically allocated arrays accessed via pointers do not get bounds
+  // constraints.
+  int *x = my_malloc(100 * sizeof(int));
+  return x[i];
+}
diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index 77e5199a1573a..a2f3ad9e514cb 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -1,14 +1,17 @@
-// Test that array bounds constraints generate llvm.assume statements for optimization hints.
-// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s
-
 // This test verifies that clang generates llvm.assume statements to inform the
 // optimizer that array subscripts are within bounds to enable better optimization.
+// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
+
+// Verify no assumes are generated.
+// RUN: %clang_cc1 -emit-llvm -O2 -fno-assume-array-bounds %s -o - | FileCheck %s -check-prefix=NO-FLAG
 
 // CHECK-LABEL: define {{.*}} @test_simple_array
+// NO-FLAG-LABEL: define {{.*}} @test_simple_array
 int test_simple_array(int i) {
   int arr[10];  // C arrays are 0-based: valid indices are [0, 9]
   // CHECK: %{{.*}} = icmp ult i32 %i, 10
   // CHECK: call void @llvm.assume(i1 %{{.*}})
+  // NO-FLAG-NOT: call void @llvm.assume
   return arr[i];
 }
 

>From ec1024d012bd336d9efa996709bab3c23e59ab36 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 4 Oct 2025 18:09:35 -0500
Subject: [PATCH 3/6] fix UB in testcase

---
 clang/test/CodeGen/array-bounds-constraints-safety.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
index bbd72900361ec..3484853ceb662 100644
--- a/clang/test/CodeGen/array-bounds-constraints-safety.c
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -69,8 +69,10 @@ int test_extern_array(int i) {
 }
 
 // CHECK-LABEL: define {{.*}} @test_local_constant_array
+void init_array(int *arr);
 int test_local_constant_array(int i) {
   int arr[10];
+  init_array(arr);  // Initialize to avoid UB from uninitialized read.
   // CHECK: call void @llvm.assume
   // This will generate bounds constraints.
   // We know the exact size of this alloca array.

>From 77e2606c98c8785227a3e08c0ea21c4166415805 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 4 Oct 2025 18:13:31 -0500
Subject: [PATCH 4/6] fix UB in testcase

---
 clang/test/CodeGen/array-bounds-constraints.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index a2f3ad9e514cb..b6bc9050b039a 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -7,8 +7,10 @@
 
 // CHECK-LABEL: define {{.*}} @test_simple_array
 // NO-FLAG-LABEL: define {{.*}} @test_simple_array
+void init_array(int *arr);
 int test_simple_array(int i) {
   int arr[10];  // C arrays are 0-based: valid indices are [0, 9]
+  init_array(arr);  // Initialize to avoid UB from uninitialized read.
   // CHECK: %{{.*}} = icmp ult i32 %i, 10
   // CHECK: call void @llvm.assume(i1 %{{.*}})
   // NO-FLAG-NOT: call void @llvm.assume
@@ -18,6 +20,7 @@ int test_simple_array(int i) {
 // CHECK-LABEL: define {{.*}} @test_multidimensional_array
 int test_multidimensional_array(int i, int j) {
   int arr[5][8];  // Valid indices: i in [0, 4], j in [0, 7]
+  init_array(arr[0]);  // Initialize to avoid UB from uninitialized read.
   // CHECK: %{{.*}} = icmp ult i32 %i, 5
   // CHECK: call void @llvm.assume(i1 %{{.*}})
   // CHECK: %{{.*}} = icmp ult i32 %j, 8
@@ -28,6 +31,7 @@ int test_multidimensional_array(int i, int j) {
 // CHECK-LABEL: define {{.*}} @test_unsigned_index
 int test_unsigned_index(unsigned int i) {
   int arr[10];
+  init_array(arr);  // Initialize to avoid UB from uninitialized read.
   // CHECK: %{{.*}} = icmp ult i32 %i, 10
   // CHECK: call void @llvm.assume(i1 %{{.*}})
   return arr[i];

>From 0ed60c2504985275c70838b91c3782fcf96d6eed Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Mon, 6 Oct 2025 09:28:57 -0500
Subject: [PATCH 5/6] avoid optimization in testcase

---
 clang/test/CodeGen/array-bounds-constraints.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/test/CodeGen/array-bounds-constraints.c b/clang/test/CodeGen/array-bounds-constraints.c
index b6bc9050b039a..ed4e3a603ac90 100644
--- a/clang/test/CodeGen/array-bounds-constraints.c
+++ b/clang/test/CodeGen/array-bounds-constraints.c
@@ -43,4 +43,5 @@ void test_store_undef(int i, int value) {
   // CHECK: %{{.*}} = icmp ult i32 %i, 10
   // CHECK: call void @llvm.assume(i1 %{{.*}})
   arr[i] = value;
+  init_array(arr);  // Avoid optimization of the above statement.
 }

>From 1c11e607e149dce1df94a3ef2e515115614f2393 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Mon, 6 Oct 2025 10:57:58 -0500
Subject: [PATCH 6/6] handle zero-length array pattern

---
 clang/lib/CodeGen/CGExpr.cpp                        | 12 ++++++------
 .../test/CodeGen/array-bounds-constraints-safety.c  | 13 +++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 13416228873cc..1d7ebdfdd2279 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4624,13 +4624,13 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
     return;
 
   llvm::APInt ArraySize = CAT->getSize();
-  if (ArraySize == 0)
-    return;
 
   // Don't generate assumes for flexible array member pattern.
-  // Arrays of size 1 in structs are often used as placeholders for
-  // variable-length data (pre-C99 flexible array member idiom.)
-  if (ArraySize == 1) {
+  // Size-1 arrays: "struct { int len; char data[1]; }" (pre-C99 idiom.)
+  // Zero-length arrays: "struct { int len; char data[0]; }" (GCC extension
+  // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html)
+  // Both patterns use arrays as placeholders for variable-length data.
+  if (ArraySize == 0 || ArraySize == 1) {
     if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
       if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
         const RecordDecl *RD = FD->getParent();
@@ -4640,7 +4640,7 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
         for (const auto *Field : RD->fields())
           LastField = Field;
         if (LastField == FD)
-          // This is a size-1 array as the last field in a struct.
+          // This is a zero-length or size-1 array as the last field.
           // Likely a flexible array member pattern - skip assumes.
           return;
       }
diff --git a/clang/test/CodeGen/array-bounds-constraints-safety.c b/clang/test/CodeGen/array-bounds-constraints-safety.c
index 3484853ceb662..e4a5c361391b6 100644
--- a/clang/test/CodeGen/array-bounds-constraints-safety.c
+++ b/clang/test/CodeGen/array-bounds-constraints-safety.c
@@ -6,6 +6,19 @@
 // intentionally accesses out-of-bounds for performance or compatibility.
 // This test verifies that bounds constraints are only applied to safe cases.
 
+// CHECK-LABEL: define {{.*}} @test_zero_length_array
+struct ZeroLengthData {
+    int count;
+    int items[0];  // GNU C extension: zero-length array
+};
+
+int test_zero_length_array(struct ZeroLengthData *d, int i) {
+  // CHECK-NOT: call void @llvm.assume
+  // Zero-length array as last field should not generate bounds constraints.
+  // See https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
+  return d->items[i];
+}
+
 // CHECK-LABEL: define {{.*}} @test_flexible_array_member
 struct Data {
     int count;



More information about the cfe-commits mailing list