[clang] -fsanitize=alignment: check memcpy/memmove arguments (PR #67766)

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 9 21:04:49 PDT 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/67766

>From 24d675673844f22e8aef8dc183874696216abb1d Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 28 Sep 2023 15:22:38 -0700
Subject: [PATCH 1/2] -fsanitize=alignment: check memcpy/memmove arguments

Similar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
```
// Check a
void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); }
```

For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
`TCK_ReferenceBinding` check instead.
```
// The check of a will be optimized out.
void unaligned_load(int &a, void *b) { memcpy(&a, b, sizeof(a)); }
```

```
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *'
```

Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.
---
 clang/include/clang/Basic/Sanitizers.h        |  2 +
 clang/lib/CodeGen/CGBuiltin.cpp               | 32 +++++----
 clang/test/CodeGen/catch-undef-behavior.c     | 68 ++++++++++++++++++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp  | 23 +++++++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h
index 8fdaf2e4ba8ab18..c890242269b3348 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
     Mask = Value ? (Mask | K) : (Mask & ~K);
   }
 
+  void set(SanitizerMask K) { Mask = K; }
+
   /// Disable the sanitizers specified in \p K.
   void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f68aa2c953c74b..b4b0d5cb670f7c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     }
   }
 
+  // Check NonnullAttribute/NullabilityArg and Alignment.
+  auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+                          unsigned ParmNum) {
+    Value *Val = A.getPointer();
+    EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
+                        ParmNum);
+
+    SanitizerSet SkippedChecks;
+    SkippedChecks.set(SanitizerKind::All);
+    SkippedChecks.clear(SanitizerKind::Alignment);
+    EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+                  A.getAlignment(), SkippedChecks);
+  };
+
   switch (BuiltinIDIfNoAsmLabel) {
   default: break;
   case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3734,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
-    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
-                        E->getArg(0)->getExprLoc(), FD, 0);
-    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-                        E->getArg(1)->getExprLoc(), FD, 1);
+    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
     if (BuiltinID == Builtin::BImempcpy ||
         BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3750,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Address Src = EmitPointerWithAlignment(E->getArg(1));
     uint64_t Size =
         E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
-    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
-                        E->getArg(0)->getExprLoc(), FD, 0);
-    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-                        E->getArg(1)->getExprLoc(), FD, 1);
+    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
     Builder.CreateMemCpyInline(Dest, Src, Size);
     return RValue::get(nullptr);
   }
@@ -3798,10 +3808,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
-    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
-                        E->getArg(0)->getExprLoc(), FD, 0);
-    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
-                        E->getArg(1)->getExprLoc(), FD, 1);
+    EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+    EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
     Builder.CreateMemMove(Dest, Src, SizeVal, false);
     return RValue::get(Dest.getPointer());
   }
diff --git a/clang/test/CodeGen/catch-undef-behavior.c b/clang/test/CodeGen/catch-undef-behavior.c
index ca0df0f002f8912..3956a475f319ea9 100644
--- a/clang/test/CodeGen/catch-undef-behavior.c
+++ b/clang/test/CodeGen/catch-undef-behavior.c
@@ -354,21 +354,63 @@ void call_decl_nonnull(int *a) {
   decl_nonnull(a);
 }
 
-extern void *memcpy (void *, const void *, unsigned) __attribute__((nonnull(1, 2)));
+extern void *memcpy(void *, const void *, unsigned long) __attribute__((nonnull(1, 2)));
 
 // CHECK-COMMON-LABEL: @call_memcpy_nonnull
 void call_memcpy_nonnull(void *p, void *q, int sz) {
   // CHECK-COMMON: icmp ne ptr {{.*}}, null
   // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg
   // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON-NOT: call
 
   // CHECK-COMMON: icmp ne ptr {{.*}}, null
   // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg
   // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON-NOT: call
+
+  // CHECK-COMMON: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 1 %1, i64 %conv, i1 false)
   memcpy(p, q, sz);
 }
 
-extern void *memmove (void *, const void *, unsigned) __attribute__((nonnull(1, 2)));
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memcpy(
+void call_memcpy(long *p, short *q, int sz) {
+  // CHECK-COMMON: icmp ne ptr {{.*}}, null
+  // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: icmp ne ptr {{.*}}, null
+  // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 %conv, i1 false)
+  memcpy(p, q, sz);
+}
+
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memcpy_inline(
+void call_memcpy_inline(long *p, short *q) {
+  // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 2, i1 false)
+  __builtin_memcpy_inline(p, q, 2);
+}
+
+extern void *memmove(void *, const void *, unsigned long) __attribute__((nonnull(1, 2)));
 
 // CHECK-COMMON-LABEL: @call_memmove_nonnull
 void call_memmove_nonnull(void *p, void *q, int sz) {
@@ -382,6 +424,28 @@ void call_memmove_nonnull(void *p, void *q, int sz) {
   memmove(p, q, sz);
 }
 
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memmove(
+void call_memmove(long *p, short *q, int sz) {
+  // CHECK-COMMON: icmp ne ptr {{.*}}, null
+  // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: icmp ne ptr {{.*}}, null
+  // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+  // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+  // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+  // CHECK-COMMON: call void @llvm.memmove.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 %conv, i1 false)
+  memmove(p, q, sz);
+}
+
 // CHECK-COMMON-LABEL: @call_nonnull_variadic
 __attribute__((nonnull)) void nonnull_variadic(int a, ...);
 void call_nonnull_variadic(int a, int *b) {
diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
index 3cb8cb80feb0621..99a644a93fb0c80 100644
--- a/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
@@ -1,6 +1,8 @@
 // RUN: %clangxx %gmlt -fsanitize=alignment %s -O3 -o %t
 // RUN: %run %t l0 && %run %t s0 && %run %t r0 && %run %t m0 && %run %t f0 && %run %t n0 && %run %t u0
 // RUN: %run %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --strict-whitespace
+// RUN: %run %t L1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMCPY-LOAD
+// RUN: %run %t S1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMCPY-STORE
 // RUN: %run %t r1 2>&1 | FileCheck %s --check-prefix=CHECK-REFERENCE
 // RUN: %run %t m1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMBER
 // RUN: %run %t f1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
@@ -15,6 +17,7 @@
 // XFAIL: target={{.*openbsd.*}}
 
 #include <new>
+#include <string.h>
 
 struct S {
   S() {}
@@ -47,6 +50,16 @@ int main(int, char **argv) {
     return *p && 0;
     // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
+  case 'L': {
+    int x;
+    // CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', which requires 4 byte alignment
+    // CHECK-MEMCPY-LOAD-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-MEMCPY-LOAD-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-MEMCPY-LOAD-NEXT: {{^             \^}}
+    memcpy(&x, p, sizeof(x));
+    return x && 0;
+  }
+
   case 's':
     // CHECK-STORE: misaligned.cpp:[[@LINE+4]]{{(:5)?}}: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
     // CHECK-STORE-NEXT: [[PTR]]: note: pointer points here
@@ -55,6 +68,16 @@ int main(int, char **argv) {
     *p = 1;
     break;
 
+  case 'S': {
+    int x = 1;
+    // CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'void *', which requires 4 byte alignment
+    // CHECK-MEMCPY-STORE-NEXT: [[PTR]]: note: pointer points here
+    // CHECK-MEMCPY-STORE-NEXT: {{^ 00 00 00 01 02 03 04  05}}
+    // CHECK-MEMCPY-STORE-NEXT: {{^             \^}}
+    memcpy(p, &x, sizeof(x));
+    break;
+  }
+
   case 'r':
     // CHECK-REFERENCE: misaligned.cpp:[[@LINE+4]]{{(:(5|15))?}}: runtime error: reference binding to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
     // CHECK-REFERENCE-NEXT: [[PTR]]: note: pointer points here

>From c5d2254d76108a43460eab47968f02cced09949d Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Fri, 29 Sep 2023 12:50:42 -0700
Subject: [PATCH 2/2] Decode correct type name with a best effort by detecting
 one level of BitCast

---
 clang/lib/CodeGen/CGBuiltin.cpp                 | 17 ++++++++++++-----
 clang/test/CodeGen/catch-undef-behavior.c       |  6 +++++-
 .../ubsan/TestCases/TypeCheck/misaligned.cpp    |  4 ++--
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b4b0d5cb670f7c1..8cb7943df9a7822 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2737,11 +2737,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
                         ParmNum);
 
-    SanitizerSet SkippedChecks;
-    SkippedChecks.set(SanitizerKind::All);
-    SkippedChecks.clear(SanitizerKind::Alignment);
-    EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
-                  A.getAlignment(), SkippedChecks);
+    if (SanOpts.has(SanitizerKind::Alignment)) {
+      SanitizerSet SkippedChecks;
+      SkippedChecks.set(SanitizerKind::All);
+      SkippedChecks.clear(SanitizerKind::Alignment);
+      SourceLocation Loc = Arg->getExprLoc();
+      // Strip an implicit cast.
+      if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
+        if (CE->getCastKind() == CK_BitCast)
+          Arg = CE->getSubExpr();
+      EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
+                    SkippedChecks);
+    }
   };
 
   switch (BuiltinIDIfNoAsmLabel) {
diff --git a/clang/test/CodeGen/catch-undef-behavior.c b/clang/test/CodeGen/catch-undef-behavior.c
index 3956a475f319ea9..7679a380fc994c3 100644
--- a/clang/test/CodeGen/catch-undef-behavior.c
+++ b/clang/test/CodeGen/catch-undef-behavior.c
@@ -27,6 +27,9 @@
 // CHECK-UBSAN: @[[SCHAR:.*]] = private unnamed_addr constant { i16, i16, [14 x i8] } { i16 0, i16 7, [14 x i8] c"'signed char'\00" }
 // CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} }
 
+// CHECK-UBSAN: @[[PLONG:.*]] = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 -1, i16 0, [9 x i8] c"'long *'\00" }
+// CHECK-UBSAN: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 10 {{.*}} @[[PLONG]], {{.*}} }
+
 // PR6805
 // CHECK-COMMON-LABEL: @foo
 void foo(void) {
@@ -379,7 +382,7 @@ void call_memcpy(long *p, short *q, int sz) {
   // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
   // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
   // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
-  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+  // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(ptr @[[LINE_1600]]
   // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
 
   // CHECK-COMMON: icmp ne ptr {{.*}}, null
@@ -391,6 +394,7 @@ void call_memcpy(long *p, short *q, int sz) {
   // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
 
   // CHECK-COMMON: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 %conv, i1 false)
+#line 1600
   memcpy(p, q, sz);
 }
 
diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
index 99a644a93fb0c80..e39a0ab4e658963 100644
--- a/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/misaligned.cpp
@@ -52,7 +52,7 @@ int main(int, char **argv) {
 
   case 'L': {
     int x;
-    // CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', which requires 4 byte alignment
+    // CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *', which requires 4 byte alignment
     // CHECK-MEMCPY-LOAD-NEXT: [[PTR]]: note: pointer points here
     // CHECK-MEMCPY-LOAD-NEXT: {{^ 00 00 00 01 02 03 04  05}}
     // CHECK-MEMCPY-LOAD-NEXT: {{^             \^}}
@@ -70,7 +70,7 @@ int main(int, char **argv) {
 
   case 'S': {
     int x = 1;
-    // CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'void *', which requires 4 byte alignment
+    // CHECK-MEMCPY-STORE: misaligned.cpp:[[#@LINE+4]]{{(:12)?}}: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *', which requires 4 byte alignment
     // CHECK-MEMCPY-STORE-NEXT: [[PTR]]: note: pointer points here
     // CHECK-MEMCPY-STORE-NEXT: {{^ 00 00 00 01 02 03 04  05}}
     // CHECK-MEMCPY-STORE-NEXT: {{^             \^}}



More information about the cfe-commits mailing list