[compiler-rt] 8c4a65b - [ubsan] Check implicit casts in ObjC for-in statements

Vedant Kumar via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 13 15:11:29 PDT 2020


Author: Vedant Kumar
Date: 2020-07-13T15:11:18-07:00
New Revision: 8c4a65b9b2ca6961139beca92de37eea479f00fa

URL: https://github.com/llvm/llvm-project/commit/8c4a65b9b2ca6961139beca92de37eea479f00fa
DIFF: https://github.com/llvm/llvm-project/commit/8c4a65b9b2ca6961139beca92de37eea479f00fa.diff

LOG: [ubsan] Check implicit casts in ObjC for-in statements

Check that the implicit cast from `id` used to construct the element
variable in an ObjC for-in statement is valid.

This check is included as part of a new `objc-cast` sanitizer, outside
of the main 'undefined' group, as (IIUC) the behavior it's checking for
is not technically UB.

The check can be extended to cover other kinds of invalid casts in ObjC.

Partially addresses: rdar://12903059, rdar://9542496

Differential Revision: https://reviews.llvm.org/D71491

Added: 
    compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m

Modified: 
    clang/docs/UndefinedBehaviorSanitizer.rst
    clang/include/clang/Basic/Sanitizers.def
    clang/lib/CodeGen/CGObjC.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Driver/SanitizerArgs.cpp
    clang/lib/Driver/ToolChains/Darwin.cpp
    clang/test/CodeGenObjC/for-in.m
    compiler-rt/lib/ubsan/ubsan_checks.inc
    compiler-rt/lib/ubsan/ubsan_handlers.cpp
    compiler-rt/lib/ubsan/ubsan_handlers.h
    compiler-rt/lib/ubsan/ubsan_value.cpp
    compiler-rt/lib/ubsan/ubsan_value.h
    compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 0a27810150db..76676dfce95b 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -127,6 +127,10 @@ Available checks are:
      is annotated with ``_Nonnull``.
   -  ``-fsanitize=nullability-return``: Returning null from a function with
      a return type annotated with ``_Nonnull``.
+  -  ``-fsanitize=objc-cast``: Invalid implicit cast of an ObjC object pointer
+     to an incompatible type. This is often unintentional, but is not undefined
+     behavior, therefore the check is not a part of the ``undefined`` group.
+     Currently only supported on Darwin.
   -  ``-fsanitize=object-size``: An attempt to potentially use bytes which
      the optimizer can determine are not part of the object being accessed.
      This will also detect some types of undefined behavior that may not

diff  --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def
index 0037cc2146f2..2912bdd44b2d 100644
--- a/clang/include/clang/Basic/Sanitizers.def
+++ b/clang/include/clang/Basic/Sanitizers.def
@@ -156,6 +156,8 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change",
                 ImplicitIntegerArithmeticValueChange,
                 ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation)
 
+SANITIZER("objc-cast", ObjCCast)
+
 // FIXME:
 //SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
 //                ImplicitIntegerArithmeticValueChange |

diff  --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 158a548e66c1..cd2b84f5dd20 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1836,6 +1836,40 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   llvm::Value *CurrentItem =
     Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
 
+  if (SanOpts.has(SanitizerKind::ObjCCast)) {
+    // Before using an item from the collection, check that the implicit cast
+    // from id to the element type is valid. This is done with instrumentation
+    // roughly corresponding to:
+    //
+    //   if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ }
+    const ObjCObjectPointerType *ObjPtrTy =
+        elementType->getAsObjCInterfacePointerType();
+    const ObjCInterfaceType *InterfaceTy =
+        ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr;
+    if (InterfaceTy) {
+      SanitizerScope SanScope(this);
+      auto &C = CGM.getContext();
+      assert(InterfaceTy->getDecl() && "No decl for ObjC interface type");
+      Selector IsKindOfClassSel = GetUnarySelector("isKindOfClass", C);
+      CallArgList IsKindOfClassArgs;
+      llvm::Value *Cls =
+          CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl());
+      IsKindOfClassArgs.add(RValue::get(Cls), C.getObjCClassType());
+      llvm::Value *IsClass =
+          CGM.getObjCRuntime()
+              .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy,
+                                   IsKindOfClassSel, CurrentItem,
+                                   IsKindOfClassArgs)
+              .getScalarVal();
+      llvm::Constant *StaticData[] = {
+          EmitCheckSourceLocation(S.getBeginLoc()),
+          EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))};
+      EmitCheck({{IsClass, SanitizerKind::ObjCCast}},
+                SanitizerHandler::InvalidObjCCast,
+                ArrayRef<llvm::Constant *>(StaticData), CurrentItem);
+    }
+  }
+
   // Cast that value to the right type.
   CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
                                       "currentitem");

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1fc2ed76ca9e..d794f4f0fa81 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -124,6 +124,7 @@ enum TypeEvaluationKind {
   SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1)             \
   SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
   SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
+  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0)                       \
   SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
   SANITIZER_CHECK(MissingReturn, missing_return, 0)                            \
   SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \

diff  --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 0b81152d57f6..bcc9ffc7ff8f 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -27,7 +27,8 @@ using namespace llvm::opt;
 static const SanitizerMask NeedsUbsanRt =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask NeedsUbsanCxxRt =
     SanitizerKind::Vptr | SanitizerKind::CFI;
 static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr;
@@ -48,11 +49,11 @@ static const SanitizerMask SupportsCoverage =
     SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
     SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero |
     SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack |
-    SanitizerKind::Thread;
+    SanitizerKind::Thread | SanitizerKind::ObjCCast;
 static const SanitizerMask RecoverableByDefault =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::FloatDivideByZero;
+    SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast;
 static const SanitizerMask Unrecoverable =
     SanitizerKind::Unreachable | SanitizerKind::Return;
 static const SanitizerMask AlwaysRecoverable =
@@ -62,7 +63,8 @@ static const SanitizerMask TrappingSupported =
     (SanitizerKind::Undefined & ~SanitizerKind::Vptr) |
     SanitizerKind::UnsignedIntegerOverflow | SanitizerKind::ImplicitConversion |
     SanitizerKind::Nullability | SanitizerKind::LocalBounds |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask TrappingDefault = SanitizerKind::CFI;
 static const SanitizerMask CFIClasses =
     SanitizerKind::CFIVCall | SanitizerKind::CFINVCall |

diff  --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 2e1190c34ea7..7b879f8cb652 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2721,6 +2721,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
   Res |= SanitizerKind::Fuzzer;
   Res |= SanitizerKind::FuzzerNoLink;
   Res |= SanitizerKind::Function;
+  Res |= SanitizerKind::ObjCCast;
 
   // Prior to 10.9, macOS shipped a version of the C++ standard library without
   // C++11 support. The same is true of iOS prior to version 5. These OS'es are

diff  --git a/clang/test/CodeGenObjC/for-in.m b/clang/test/CodeGenObjC/for-in.m
index 26fe7922aee9..20e89b33affa 100644
--- a/clang/test/CodeGenObjC/for-in.m
+++ b/clang/test/CodeGenObjC/for-in.m
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 %s -verify -o /dev/null
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -emit-llvm -fsanitize=objc-cast -o - | FileCheck %s
 
 void p(const char*, ...);
 
@@ -18,12 +19,26 @@ -(const char*) cString;
 #define L5(n) L4(n+0),L4(n+16)
 #define L6(n) L5(n+0),L5(n+32)
 
+// CHECK-LABEL: define void @t0
 void t0() {
   NSArray *array = [NSArray arrayWithObjects: L1(0), (void*)0];
 
   p("array.length: %d\n", [array count]);
   unsigned index = 0;
   for (NSString *i in array) {	// expected-warning {{collection expression type 'NSArray *' may not respond}}
+
+    // CHECK:      [[expectedCls:%.*]] = load %struct._class_t*, {{.*}}, !nosanitize
+    // CHECK-NEXT: [[kindOfClassSel:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES{{.*}}, !nosanitize
+    // CHECK-NEXT: [[expectedClsI8:%.*]] = bitcast %struct._class_t* [[expectedCls]] to i8*, !nosanitize
+    // CHECK-NEXT: [[isCls:%.*]] = call zeroext i1 bitcast {{.*}}@objc_msgSend to i1 (i8*, i8*, {{.*}})(i8* [[theItem:%.*]], i8* [[kindOfClassSel]], i8* [[expectedClsI8]]), !nosanitize
+    // CHECK: br i1 [[isCls]]
+
+    // CHECK: ptrtoint i8* [[theItem]] to i64, !nosanitize
+    // CHECK-NEXT: call void @__ubsan_handle_invalid_objc_cast
+    // CHECK-NEXT: unreachable, !nosanitize
+
+    // CHECK: bitcast i8* [[theItem]]
+
     p("element %d: %s\n", index++, [i cString]);
   }
 }

diff  --git a/compiler-rt/lib/ubsan/ubsan_checks.inc b/compiler-rt/lib/ubsan/ubsan_checks.inc
index 2c1529a7d92c..846cd89ee19f 100644
--- a/compiler-rt/lib/ubsan/ubsan_checks.inc
+++ b/compiler-rt/lib/ubsan/ubsan_checks.inc
@@ -37,6 +37,7 @@ UBSAN_CHECK(IntegerDivideByZero, "integer-divide-by-zero",
             "integer-divide-by-zero")
 UBSAN_CHECK(FloatDivideByZero, "float-divide-by-zero", "float-divide-by-zero")
 UBSAN_CHECK(InvalidBuiltin, "invalid-builtin-use", "invalid-builtin-use")
+UBSAN_CHECK(InvalidObjCCast, "invalid-objc-cast", "invalid-objc-cast")
 UBSAN_CHECK(ImplicitUnsignedIntegerTruncation,
             "implicit-unsigned-integer-truncation",
             "implicit-unsigned-integer-truncation")

diff  --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
index 7f6a46fb6cf0..e201e6bba220 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -16,6 +16,7 @@
 #include "ubsan_diag.h"
 #include "ubsan_flags.h"
 #include "ubsan_monitor.h"
+#include "ubsan_value.h"
 
 #include "sanitizer_common/sanitizer_common.h"
 
@@ -640,6 +641,36 @@ void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) {
   Die();
 }
 
+static void handleInvalidObjCCast(InvalidObjCCast *Data, ValueHandle Pointer,
+                                  ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::InvalidObjCCast;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  const char *GivenClass = getObjCClassName(Pointer);
+  const char *GivenClassStr = GivenClass ? GivenClass : "<unknown type>";
+
+  Diag(Loc, DL_Error, ET,
+       "invalid ObjC cast, object is a '%0', but expected a %1")
+      << GivenClassStr << Data->ExpectedType;
+}
+
+void __ubsan::__ubsan_handle_invalid_objc_cast(InvalidObjCCast *Data,
+                                               ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(false);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+}
+void __ubsan::__ubsan_handle_invalid_objc_cast_abort(InvalidObjCCast *Data,
+                                                     ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+  Die();
+}
+
 static void handleNonNullReturn(NonNullReturnData *Data, SourceLocation *LocPtr,
                                 ReportOptions Opts, bool IsAttr) {
   if (!LocPtr)

diff  --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h
index 22ca96422381..219fb15de55f 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.h
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -168,6 +168,14 @@ struct InvalidBuiltinData {
 /// Handle a builtin called in an invalid way.
 RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
 
+struct InvalidObjCCast {
+  SourceLocation Loc;
+  const TypeDescriptor &ExpectedType;
+};
+
+/// Handle an invalid ObjC cast.
+RECOVERABLE(invalid_objc_cast, InvalidObjCCast *Data, ValueHandle Pointer)
+
 struct NonNullReturnData {
   SourceLocation AttrLoc;
 };

diff  --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
index 60f0b5c99348..79c3ba991d39 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -16,9 +16,57 @@
 #include "ubsan_value.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_MAC', as the latter is
+// unclear. rdar://58124919 tracks using a more obviously portable guard.
+#if defined(__APPLE__)
+#include <dlfcn.h>
+#endif
 
 using namespace __ubsan;
 
+typedef const char *(*ObjCGetClassNameTy)(void *);
+
+const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
+#if defined(__APPLE__)
+  // We need to query the ObjC runtime for some information, but do not want
+  // to introduce a static dependency from the ubsan runtime onto ObjC. Try to
+  // grab a handle to the ObjC runtime used by the process.
+  static bool AttemptedDlopen = false;
+  static void *ObjCHandle = nullptr;
+  static void *ObjCObjectGetClassName = nullptr;
+
+  // Prevent threads from racing to dlopen().
+  static __sanitizer::StaticSpinMutex Lock;
+  {
+    __sanitizer::SpinMutexLock Guard(&Lock);
+
+    if (!AttemptedDlopen) {
+      ObjCHandle = dlopen(
+          "/usr/lib/libobjc.A.dylib",
+          RTLD_LAZY         // Only bind symbols when used.
+              | RTLD_LOCAL  // Only make symbols available via the handle.
+              | RTLD_NOLOAD // Do not load the dylib, just grab a handle if the
+                            // image is already loaded.
+              | RTLD_FIRST  // Only search the image pointed-to by the handle.
+      );
+      AttemptedDlopen = true;
+      if (!ObjCHandle)
+        return nullptr;
+      ObjCObjectGetClassName = dlsym(ObjCHandle, "object_getClassName");
+    }
+  }
+
+  if (!ObjCObjectGetClassName)
+    return nullptr;
+
+  return ObjCGetClassNameTy(ObjCObjectGetClassName)((void *)Pointer);
+#else
+  return nullptr;
+#endif
+}
+
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
   if (isInlineInt()) {

diff  --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
index a216e3a147e9..e0957276dd24 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.h
+++ b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -135,6 +135,9 @@ class TypeDescriptor {
 /// \brief An opaque handle to a value.
 typedef uptr ValueHandle;
 
+/// Returns the class name of the given ObjC object, or null if the name
+/// cannot be found.
+const char *getObjCClassName(ValueHandle Pointer);
 
 /// \brief Representation of an operand value provided by the instrumented code.
 ///

diff  --git a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
index ed62ddd0fa34..8654c705cfbb 100644
--- a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
+++ b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
@@ -109,6 +109,7 @@ HANDLER(vla_bound_not_positive, "vla-bound-not-positive")
 HANDLER(float_cast_overflow, "float-cast-overflow")
 HANDLER(load_invalid_value, "load-invalid-value")
 HANDLER(invalid_builtin, "invalid-builtin")
+HANDLER(invalid_objc_cast, "invalid-objc-cast")
 HANDLER(function_type_mismatch, "function-type-mismatch")
 HANDLER(implicit_conversion, "implicit-conversion")
 HANDLER(nonnull_arg, "nonnull-arg")

diff  --git a/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
new file mode 100644
index 000000000000..f502e5f53537
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
@@ -0,0 +1,27 @@
+// REQUIRES: darwin
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast %s -O1 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast -fno-sanitize-recover=objc-cast %s -O1 -o %t.trap
+// RUN: not %run %t.trap 2>&1 | FileCheck %s
+
+#include <Foundation/Foundation.h>
+
+int main() {
+  NSArray *arrayOfInt = [NSArray arrayWithObjects:@1, @2, @3, (void *)0];
+  // CHECK: objc-cast.m:[[@LINE+1]]:{{.*}}: runtime error: invalid ObjC cast, object is a '__NSCFNumber', but expected a 'NSString'
+  for (NSString *str in arrayOfInt) {
+    NSLog(@"%@", str);
+  }
+
+  NSArray *arrayOfStr = [NSArray arrayWithObjects:@"a", @"b", @"c", (void *)0];
+  for (NSString *str in arrayOfStr) {
+    NSLog(@"%@", str);
+  }
+
+  // The diagnostic should only be printed once.
+  // CHECK-NOT: runtime error
+
+  return 0;
+}


        


More information about the llvm-commits mailing list