[clang] Modify BoundsSan to improve debuggability (PR #65972)

Oskar Wirga via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 27 10:38:23 PDT 2023


https://github.com/oskarwirga updated https://github.com/llvm/llvm-project/pull/65972

>From 8dd1d0c534faadd65f546a150bbd2cc5a132aa1e Mon Sep 17 00:00:00 2001
From: Oskar Wirga <10386631+oskarwirga at users.noreply.github.com>
Date: Wed, 27 Sep 2023 10:37:49 -0700
Subject: [PATCH 1/2] Modify array-bounds sanitizer to improve debuggability

---
 clang/lib/CodeGen/CGExpr.cpp         | 34 ++++++++++++++++++----------
 clang/test/CodeGen/bounds-checking.c | 13 +++++++++++
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3123e278985f210..7b4389c874b3f90 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -51,6 +51,12 @@
 using namespace clang;
 using namespace CodeGen;
 
+// Experiment to make sanitizers easier to debug
+static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization(
+    "ubsan-unique-traps", llvm::cl::Optional,
+    llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"),
+    llvm::cl::init(false));
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3553,17 +3559,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
   // check-type per function to save on code size.
   if (TrapBBs.size() <= CheckHandlerID)
     TrapBBs.resize(CheckHandlerID + 1);
+
   llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
 
-  if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
-      (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+  if (!ClSanitizeDebugDeoptimization &&
+      CGM.getCodeGenOpts().OptimizationLevel && TrapBB &&
+      (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+    auto Call = TrapBB->begin();
+    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
+
+    Call->applyMergedLocation(Call->getDebugLoc(),
+                              Builder.getCurrentDebugLocation());
+    Builder.CreateCondBr(Checked, Cont, TrapBB);
+  } else {
     TrapBB = createBasicBlock("trap");
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
 
-    llvm::CallInst *TrapCall =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
-                           llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
+    llvm::CallInst *TrapCall = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
+        llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization
+                                               ? TrapBB->getParent()->size()
+                                               : CheckHandlerID));
 
     if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
       auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
@@ -3573,13 +3590,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
-  } else {
-    auto Call = TrapBB->begin();
-    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
-
-    Call->applyMergedLocation(Call->getDebugLoc(),
-                              Builder.getCurrentDebugLocation());
-    Builder.CreateCondBr(Checked, Cont, TrapBB);
   }
 
   EmitBlock(Cont);
diff --git a/clang/test/CodeGen/bounds-checking.c b/clang/test/CodeGen/bounds-checking.c
index 1b9e28915e5d9af..cda208917ea1ba1 100644
--- a/clang/test/CodeGen/bounds-checking.c
+++ b/clang/test/CodeGen/bounds-checking.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=array-bounds -fsanitize-trap=array-bounds -O3 -mllvm -ubsan-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTARRAY
 //
 // REQUIRES: x86-registered-target
 
@@ -66,3 +67,15 @@ int f7(union U *u, int i) {
   // CHECK-NOT: @llvm.ubsantrap
   return u->c[i];
 }
+
+
+char B[10];
+char B2[10];
+// CHECK-LABEL: @f8
+void f8(int i, int k) {
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 2)
+  B[i] = '\0';
+
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 4)
+  B2[k] = '\0';
+}

>From 601856499676b5d5303297ed437f22d40376922e Mon Sep 17 00:00:00 2001
From: Oskar Wirga <10386631+oskarwirga at users.noreply.github.com>
Date: Wed, 27 Sep 2023 10:38:01 -0700
Subject: [PATCH 2/2] Modify local-bounds sanitizer to improve debuggability

---
 clang/test/CodeGen/bounds-checking.c          |  3 +
 .../Instrumentation/BoundsChecking.cpp        | 25 ++++--
 .../BoundsChecking/ubsan-unique-traps.ll      | 45 ++++++++++
 .../MC/AArch64/local-bounds-single-trap.ll    | 83 +++++++++++++++++++
 4 files changed, 149 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
 create mode 100644 llvm/test/MC/AArch64/local-bounds-single-trap.ll

diff --git a/clang/test/CodeGen/bounds-checking.c b/clang/test/CodeGen/bounds-checking.c
index cda208917ea1ba1..636d4f289e24786 100644
--- a/clang/test/CodeGen/bounds-checking.c
+++ b/clang/test/CodeGen/bounds-checking.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -fsanitize-trap=local-bounds -O3 -mllvm -bounds-checking-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTLOCAL
 // RUN: %clang_cc1 -fsanitize=array-bounds -fsanitize-trap=array-bounds -O3 -mllvm -ubsan-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTARRAY
 //
 // REQUIRES: x86-registered-target
@@ -73,9 +74,11 @@ char B[10];
 char B2[10];
 // CHECK-LABEL: @f8
 void f8(int i, int k) {
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 3)
   // NOOPTARRAY: call void @llvm.ubsantrap(i8 2)
   B[i] = '\0';
 
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 5)
   // NOOPTARRAY: call void @llvm.ubsantrap(i8 4)
   B2[k] = '\0';
 }
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 709095184af5df5..ee5b819604170eb 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -37,6 +37,9 @@ using namespace llvm;
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
                                   cl::desc("Use one trap block per function"));
 
+static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
+                                 cl::desc("Always use one trap per check"));
+
 STATISTIC(ChecksAdded, "Bounds checks added");
 STATISTIC(ChecksSkipped, "Bounds checks skipped");
 STATISTIC(ChecksUnable, "Bounds checks unable to add");
@@ -180,19 +183,27 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
   // will create a fresh block every time it is called.
   BasicBlock *TrapBB = nullptr;
   auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
-    if (TrapBB && SingleTrapBB)
-      return TrapBB;
-
     Function *Fn = IRB.GetInsertBlock()->getParent();
-    // FIXME: This debug location doesn't make a lot of sense in the
-    // `SingleTrapBB` case.
     auto DebugLoc = IRB.getCurrentDebugLocation();
     IRBuilder<>::InsertPointGuard Guard(IRB);
+
+    if (TrapBB && SingleTrapBB && !DebugTrapBB)
+      return TrapBB;
+
     TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
     IRB.SetInsertPoint(TrapBB);
 
-    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
-    CallInst *TrapCall = IRB.CreateCall(F, {});
+    Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : Intrinsic::trap;
+    auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
+
+    CallInst *TrapCall;
+    if (DebugTrapBB) {
+      TrapCall =
+          IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
+    } else {
+      TrapCall = IRB.CreateCall(F, {});
+    }
+
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     TrapCall->setDebugLoc(DebugLoc);
diff --git a/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
new file mode 100644
index 000000000000000..a3f34007e9b09f8
--- /dev/null
+++ b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=bounds-checking -bounds-checking-unique-traps -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare noalias ptr @malloc(i64) nounwind allocsize(0)
+
+define void @f() nounwind {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
+; CHECK-NEXT:    br label [[TRAP:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    store i32 3, ptr [[IDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    br label [[TRAP1:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    store i32 3, ptr [[IDX2]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 8
+; CHECK-NEXT:    br label [[TRAP2:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    store i32 3, ptr [[IDX3]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 3) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap1:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 5) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap2:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 7) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+;
+  %1 = tail call ptr @malloc(i64 32)
+  %idx = getelementptr inbounds i32, ptr %1, i64 8
+  store i32 3, ptr %idx, align 4
+  %2 = tail call ptr @malloc(i64 32)
+  %idx2 = getelementptr inbounds i32, ptr %2, i64 8
+  store i32 3, ptr %idx2, align 4
+  %3 = tail call ptr @malloc(i64 32)
+  %idx3 = getelementptr inbounds i32, ptr %3, i64 8
+  store i32 3, ptr %idx3, align 4
+  ret void
+}
diff --git a/llvm/test/MC/AArch64/local-bounds-single-trap.ll b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
new file mode 100644
index 000000000000000..53a0e010537f096
--- /dev/null
+++ b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ASM
+; What this test does is check that even with nomerge, the functions still get merged in
+; compiled code as the ubsantrap call gets lowered to a single instruction: brk.
+
+
+ at B = dso_local global [10 x i8] zeroinitializer, align 1
+ at B2 = dso_local global [10 x i8] zeroinitializer, align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
+entry:
+; CHECK-ASM: 	cmp	x8, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.1:                               // %entry
+; CHECK-ASM: 	mov	w9, #10                         // =0xa
+; CHECK-ASM: 	sub	x9, x9, x8
+; CHECK-ASM: 	cbz	x9, .LBB0_5
+; CHECK-ASM: // %bb.2:
+; CHECK-ASM: 	ldrsw	x9, [sp, #8]
+; CHECK-ASM: 	adrp	x10, B
+; CHECK-ASM: 	add	x10, x10, :lo12:B
+; CHECK-ASM: 	strb	wzr, [x10, x8]
+; CHECK-ASM: 	cmp	x9, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.3:
+; CHECK-ASM: 	mov	w8, #10                         // =0xa
+; CHECK-ASM: 	sub	x8, x8, x9
+; CHECK-ASM: 	cbz	x8, .LBB0_5
+; CHECK-ASM: // %bb.4:
+; CHECK-ASM: 	adrp	x8, B2
+; CHECK-ASM: 	add	x8, x8, :lo12:B2
+; CHECK-ASM: 	strb	wzr, [x8, x9]
+; CHECK-ASM: 	add	sp, sp, #16
+; CHECK-ASM: 	.cfi_def_cfa_offset 0
+; CHECK-ASM: 	ret
+; CHECK-ASM: .LBB0_5:                                // %trap3
+; CHECK-ASM: 	.cfi_restore_state
+; CHECK-ASM: 	brk	#0x1
+  %i.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  store i32 %k, ptr %k.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  %idxprom = sext i32 %0 to i64
+  %1 = add i64 0, %idxprom
+  %arrayidx = getelementptr inbounds [10 x i8], ptr @B, i64 0, i64 %idxprom
+  %2 = sub i64 10, %1
+  %3 = icmp ult i64 10, %1
+  %4 = icmp ult i64 %2, 1
+  %5 = or i1 %3, %4
+  br i1 %5, label %trap, label %6
+
+6:                                                ; preds = %entry
+  store i8 0, ptr %arrayidx, align 1
+  %7 = load i32, ptr %k.addr, align 4
+  %idxprom1 = sext i32 %7 to i64
+  %8 = add i64 0, %idxprom1
+  %arrayidx2 = getelementptr inbounds [10 x i8], ptr @B2, i64 0, i64 %idxprom1
+  %9 = sub i64 10, %8
+  %10 = icmp ult i64 10, %8
+  %11 = icmp ult i64 %9, 1
+  %12 = or i1 %10, %11
+  br i1 %12, label %trap3, label %13
+
+13:                                               ; preds = %6
+  store i8 0, ptr %arrayidx2, align 1
+  ret void
+
+trap:                                             ; preds = %entry
+  call void @llvm.trap() #2
+  unreachable
+
+trap3:                                            ; preds = %6
+  call void @llvm.trap() #2
+  unreachable
+}
+
+; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
+declare void @llvm.trap() #1
+
+attributes #0 = { noinline nounwind uwtable }
+attributes #1 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #2 = { noreturn nounwind nomerge }



More information about the cfe-commits mailing list