r175949 - ubsan: Emit bounds checks for array indexing, vector indexing, and (in really simple cases) pointer arithmetic. This augments the existing bounds checking with language-level array bounds information.

Richard Smith richard-llvm at metafoo.co.uk
Fri Feb 22 18:53:19 PST 2013


Author: rsmith
Date: Fri Feb 22 20:53:19 2013
New Revision: 175949

URL: http://llvm.org/viewvc/llvm-project?rev=175949&view=rev
Log:
ubsan: Emit bounds checks for array indexing, vector indexing, and (in really simple cases) pointer arithmetic. This augments the existing bounds checking with language-level array bounds information.

Modified:
    cfe/trunk/lib/CodeGen/CGExpr.cpp
    cfe/trunk/lib/CodeGen/CGExprScalar.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h
    cfe/trunk/lib/Driver/SanitizerArgs.h
    cfe/trunk/test/CodeGenCXX/catch-undef-behavior.cpp
    cfe/trunk/test/Driver/darwin-sanitizer-ld.c
    cfe/trunk/test/Driver/ubsan-ld.c

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Fri Feb 22 20:53:19 2013
@@ -630,6 +630,83 @@ void CodeGenFunction::EmitTypeCheck(Type
   }
 }
 
+/// Determine whether this expression refers to a flexible array member in a
+/// struct. We disable array bounds checks for such members.
+static bool isFlexibleArrayMemberExpr(const Expr *E) {
+  // For compatibility with existing code, we treat arrays of length 0 or
+  // 1 as flexible array members.
+  const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
+  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) {
+    if (CAT->getSize().ugt(1))
+      return false;
+  } else if (!isa<IncompleteArrayType>(AT))
+    return false;
+
+  E = E->IgnoreParens();
+
+  // A flexible array member must be the last member in the class.
+  if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
+    // FIXME: If the base type of the member expr is not FD->getParent(),
+    // this should not be treated as a flexible array member access.
+    if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+      RecordDecl::field_iterator FI(
+          DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
+      return ++FI == FD->getParent()->field_end();
+    }
+  }
+
+  return false;
+}
+
+/// If Base is known to point to the start of an array, return the length of
+/// that array. Return 0 if the length cannot be determined.
+llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF, const Expr *Base,
+                                   QualType &IndexedType) {
+  // For the vector indexing extension, the bound is the number of elements.
+  if (const VectorType *VT = Base->getType()->getAs<VectorType>()) {
+    IndexedType = Base->getType();
+    return CGF.Builder.getInt32(VT->getNumElements());
+  }
+
+  Base = Base->IgnoreParens();
+
+  if (const CastExpr *CE = dyn_cast<CastExpr>(Base)) {
+    if (CE->getCastKind() == CK_ArrayToPointerDecay &&
+        !isFlexibleArrayMemberExpr(CE->getSubExpr())) {
+      IndexedType = CE->getSubExpr()->getType();
+      const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
+      if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+        return CGF.Builder.getInt(CAT->getSize());
+      else if (const VariableArrayType *VAT = cast<VariableArrayType>(AT))
+        return CGF.getVLASize(VAT).first;
+    }
+  }
+
+  return 0;
+}
+
+void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
+                                      llvm::Value *Index, QualType IndexType,
+                                      bool Accessed) {
+  QualType IndexedType;
+  llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType);
+  if (!Bound)
+    return;
+
+  bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType();
+  llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned);
+  llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false);
+
+  llvm::Constant *StaticData[] = {
+    EmitCheckSourceLocation(E->getExprLoc()),
+    EmitCheckTypeDescriptor(IndexedType),
+    EmitCheckTypeDescriptor(IndexType)
+  };
+  llvm::Value *Check = Accessed ? Builder.CreateICmpULT(IndexVal, BoundVal)
+                                : Builder.CreateICmpULE(IndexVal, BoundVal);
+  EmitCheck(Check, "out_of_bounds", StaticData, Index, CRK_Recoverable);
+}
+
 
 CodeGenFunction::ComplexPairTy CodeGenFunction::
 EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -705,7 +782,11 @@ LValue CodeGenFunction::EmitUnsupportedL
 }
 
 LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
-  LValue LV = EmitLValue(E);
+  LValue LV;
+  if (SanOpts->Bounds && isa<ArraySubscriptExpr>(E))
+    LV = EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E), /*Accessed*/true);
+  else
+    LV = EmitLValue(E);
   if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
     EmitTypeCheck(TCK, E->getExprLoc(), LV.getAddress(),
                   E->getType(), LV.getAlignment());
@@ -2121,12 +2202,16 @@ static const Expr *isSimpleArrayDecayOpe
   return SubExpr;
 }
 
-LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
+                                               bool Accessed) {
   // The index must always be an integer, which is not an aggregate.  Emit it.
   llvm::Value *Idx = EmitScalarExpr(E->getIdx());
   QualType IdxTy  = E->getIdx()->getType();
   bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
 
+  if (SanOpts->Bounds)
+    EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed);
+
   // If the base is a vector type, then we are forming a vector element lvalue
   // with this subscript.
   if (E->getBase()->getType()->isVectorType()) {
@@ -2187,7 +2272,13 @@ LValue CodeGenFunction::EmitArraySubscri
     // "gep x, i" here.  Emit one "gep A, 0, i".
     assert(Array->getType()->isArrayType() &&
            "Array to pointer decay must have array source type!");
-    LValue ArrayLV = EmitLValue(Array);
+    LValue ArrayLV;
+    // For simple multidimensional array indexing, set the 'accessed' flag for
+    // better bounds-checking of the base expression.
+    if (const ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+      ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
+    else
+      ArrayLV = EmitLValue(Array);
     llvm::Value *ArrayPtr = ArrayLV.getAddress();
     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
     llvm::Value *Args[] = { Zero, Idx };

Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Fri Feb 22 20:53:19 2013
@@ -986,7 +986,12 @@ Value *ScalarExprEmitter::VisitArraySubs
   // integer value.
   Value *Base = Visit(E->getBase());
   Value *Idx  = Visit(E->getIdx());
-  bool IdxSigned = E->getIdx()->getType()->isSignedIntegerOrEnumerationType();
+  QualType IdxTy = E->getIdx()->getType();
+
+  if (CGF.SanOpts->Bounds)
+    CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
+
+  bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
   Idx = Builder.CreateIntCast(Idx, CGF.Int32Ty, IdxSigned, "vecidxcast");
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
@@ -2134,6 +2139,10 @@ static Value *emitPointerArithmetic(Code
   if (isSubtraction)
     index = CGF.Builder.CreateNeg(index, "idx.neg");
 
+  if (CGF.SanOpts->Bounds)
+    CGF.EmitBoundsCheck(op.E, pointerOperand, index, indexOperand->getType(),
+                        /*Accessed*/ false);
+
   const PointerType *pointerType
     = pointerOperand->getType()->getAs<PointerType>();
   if (!pointerType) {

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Fri Feb 22 20:53:19 2013
@@ -1910,6 +1910,12 @@ public:
   void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
                      QualType Type, CharUnits Alignment = CharUnits::Zero());
 
+  /// \brief Emit a check that \p Base points into an array object, which
+  /// we can access at index \p Index. \p Accessed should be \c false if we
+  /// this expression is used as an lvalue, for instance in "&Arr[Idx]".
+  void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index,
+                       QualType IndexType, bool Accessed);
+
   llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                        bool isInc, bool isPre);
   ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -2187,7 +2193,8 @@ public:
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
   LValue EmitUnaryOpLValue(const UnaryOperator *E);
-  LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E);
+  LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
+                                bool Accessed = false);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
   LValue EmitMemberExpr(const MemberExpr *E);
   LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);

Modified: cfe/trunk/lib/Driver/SanitizerArgs.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/SanitizerArgs.h?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/lib/Driver/SanitizerArgs.h (original)
+++ cfe/trunk/lib/Driver/SanitizerArgs.h Fri Feb 22 20:53:19 2013
@@ -37,7 +37,7 @@ class SanitizerArgs {
     NeedsAsanRt = Address,
     NeedsTsanRt = Thread,
     NeedsMsanRt = Memory,
-    NeedsUbsanRt = (Undefined & ~Bounds) | Integer,
+    NeedsUbsanRt = Undefined | Integer,
     NotAllowedWithTrap = Vptr
   };
   unsigned Kind;

Modified: cfe/trunk/test/CodeGenCXX/catch-undef-behavior.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/catch-undef-behavior.cpp?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/catch-undef-behavior.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/catch-undef-behavior.cpp Fri Feb 22 20:53:19 2013
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,bounds -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
 
 struct S {
   double d;
@@ -220,4 +220,93 @@ void bad_downcast_reference(S &p) {
   (void) static_cast<T&>(p);
 }
 
+// CHECK: @_Z11array_index
+int array_index(const int (&a)[4], int n) {
+  // CHECK: %[[K1_OK:.*]] = icmp ult i64 %{{.*}}, 4
+  // CHECK: br i1 %[[K1_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  int k1 = a[n];
+
+  // CHECK: %[[R1_OK:.*]] = icmp ule i64 %{{.*}}, 4
+  // CHECK: br i1 %[[R1_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  const int *r1 = &a[n];
+
+  // CHECK: %[[K2_OK:.*]] = icmp ult i64 %{{.*}}, 8
+  // CHECK: br i1 %[[K2_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  int k2 = ((const int(&)[8])a)[n];
+
+  // CHECK: %[[K3_OK:.*]] = icmp ult i64 %{{.*}}, 4
+  // CHECK: br i1 %[[K3_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  int k3 = n[a];
+
+  return k1 + *r1 + k2;
+}
+
+// CHECK: @_Z17multi_array_index
+int multi_array_index(int n, int m) {
+  int arr[4][6];
+
+  // CHECK: %[[IDX2_OK:.*]] = icmp ult i64 %{{.*}}, 6
+  // CHECK: br i1 %[[IDX2_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+
+  // CHECK: %[[IDX1_OK:.*]] = icmp ult i64 %{{.*}}, 4
+  // CHECK: br i1 %[[IDX1_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  return arr[n][m];
+}
+
+// CHECK: @_Z11array_arith
+int array_arith(const int (&a)[4], int n) {
+  // CHECK: %[[K1_OK:.*]] = icmp ule i64 %{{.*}}, 4
+  // CHECK: br i1 %[[K1_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  const int *k1 = a + n;
+
+  // CHECK: %[[K2_OK:.*]] = icmp ule i64 %{{.*}}, 8
+  // CHECK: br i1 %[[K2_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  const int *k2 = (const int(&)[8])a + n;
+
+  return *k1 + *k2;
+}
+
+struct ArrayMembers {
+  int a1[5];
+  int a2[1];
+};
+// CHECK: @_Z18struct_array_index
+int struct_array_index(ArrayMembers *p, int n) {
+  // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 5
+  // CHECK: br i1 %[[IDX_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  return p->a1[n];
+}
+
+// CHECK: @_Z16flex_array_index
+int flex_array_index(ArrayMembers *p, int n) {
+  // CHECK-NOT: call void @__ubsan_handle_out_of_bounds(
+  return p->a2[n];
+}
+
+typedef __attribute__((ext_vector_type(4))) int V4I;
+// CHECK: @_Z12vector_index
+int vector_index(V4I v, int n) {
+  // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 4
+  // CHECK: br i1 %[[IDX_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  return v[n];
+}
+
+// CHECK: @_Z12string_index
+char string_index(int n) {
+  // CHECK: %[[IDX_OK:.*]] = icmp ult i64 %{{.*}}, 6
+  // CHECK: br i1 %[[IDX_OK]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds(
+  return "Hello"[n];
+}
+
 // CHECK: attributes [[NR_NUW]] = { noreturn nounwind }

Modified: cfe/trunk/test/Driver/darwin-sanitizer-ld.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/darwin-sanitizer-ld.c?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/test/Driver/darwin-sanitizer-ld.c (original)
+++ cfe/trunk/test/Driver/darwin-sanitizer-ld.c Fri Feb 22 20:53:19 2013
@@ -28,7 +28,8 @@
 // CHECK-UBSAN: stdc++
 
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
-// RUN:   -fsanitize=bounds %s -o %t.o 2>&1 \
+// RUN:   -fsanitize=bounds -fsanitize-undefined-trap-on-error \
+// RUN:   %s -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-BOUNDS %s
 
 // CHECK-BOUNDS: "{{.*}}ld{{(.exe)?}}"
@@ -43,7 +44,8 @@
 // CHECK-DYN-UBSAN: libclang_rt.ubsan_osx.a
 
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-darwin \
-// RUN:   -fPIC -shared -fsanitize=bounds %s -o %t.so 2>&1 \
+// RUN:   -fsanitize=bounds -fsanitize-undefined-trap-on-error \
+// RUN:   %s -o %t.so -fPIC -shared 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-DYN-BOUNDS %s
 
 // CHECK-DYN-BOUNDS: "{{.*}}ld{{(.exe)?}}"

Modified: cfe/trunk/test/Driver/ubsan-ld.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/ubsan-ld.c?rev=175949&r1=175948&r2=175949&view=diff
==============================================================================
--- cfe/trunk/test/Driver/ubsan-ld.c (original)
+++ cfe/trunk/test/Driver/ubsan-ld.c Fri Feb 22 20:53:19 2013
@@ -18,11 +18,3 @@
 // CHECK-LINUX-SHARED-NOT: "-lc"
 // CHECK-LINUX-SHARED: libclang_rt.ubsan-i386.a"
 // CHECK-LINUX-SHARED: "-lpthread"
-
-// RUN: %clang -fsanitize=bounds %s -### -o %t.o 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
-// RUN:   | FileCheck --check-prefix=CHECK-LINUX1 %s
-// CHECK-LINUX1: "{{.*}}ld{{(.exe)?}}"
-// CHECK-LINUX1-NOT: libclang_rt.ubsan-i386.a"
-// CHECK-LINUX1-NOT: "-lpthread"





More information about the cfe-commits mailing list