[clang] Modify BoundsSan to improve debuggability (PR #65972)

via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 11 09:06:06 PDT 2023


llvmbot wrote:

@llvm/pr-subscribers-clang

<details>
<summary>Changes</summary>

Context
BoundsSanitizer is a mitigation that is part of UBSAN. It can be enabled in "trap" mode to crash on OOB array accesses.

Problem
BoundsSan has zero false positives meaning every crash is a OOB array access, unfortunately optimizations cause these crashes in production builds to be a bit useless because we only know which function is crashing but not which line of code.

Godbolt example of the optimization: https://godbolt.org/z/6qjax9z1b

This Diff
I wanted to provide a way to know exactly which LOC is responsible for the crash. What we do here is use the size of the basic block as an iterator to an immediate value for the ubsan trap.
--
Full diff: https://github.com/llvm/llvm-project/pull/65972.diff

4 Files Affected:

- (modified) clang/lib/CodeGen/CGExpr.cpp (+22-12) 
- (modified) clang/test/CodeGen/bounds-checking.c (+16) 
- (modified) llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp (+18-7) 
- (added) llvm/test/MC/AArch64/local-bounds-single-trap.ll (+83) 


<pre>
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 76bbeba468db643..9c9a831e6d47ee0 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -51,6 +51,12 @@
 using namespace clang;
 using namespace CodeGen;
 
+// Experiment to make sanitizers easier to debug
+static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization(
+    "ubsan-unique-traps", llvm::cl::Optional,
+    llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"),
+    llvm::cl::init(false));
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3553,17 +3559,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
   // check-type per function to save on code size.
   if (TrapBBs.size() <= CheckHandlerID)
     TrapBBs.resize(CheckHandlerID + 1);
+
   llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
 
-  if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
-      (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+  if (!ClSanitizeDebugDeoptimization &&
+      CGM.getCodeGenOpts().OptimizationLevel && TrapBB &&
+      (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+    auto Call = TrapBB->begin();
+    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
+
+    Call->applyMergedLocation(Call->getDebugLoc(),
+                              Builder.getCurrentDebugLocation());
+    Builder.CreateCondBr(Checked, Cont, TrapBB);
+  } else {
     TrapBB = createBasicBlock("trap");
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
 
-    llvm::CallInst *TrapCall =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
-                           llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
+    llvm::CallInst *TrapCall = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
+        llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization
+                                               ? TrapBB->getParent()->size()
+                                               : CheckHandlerID));
 
     if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
       auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
@@ -3573,13 +3590,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
-  } else {
-    auto Call = TrapBB->begin();
-    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
-
-    Call->applyMergedLocation(Call->getDebugLoc(),
-                              Builder.getCurrentDebugLocation());
-    Builder.CreateCondBr(Checked, Cont, TrapBB);
   }
 
   EmitBlock(Cont);
diff --git a/clang/test/CodeGen/bounds-checking.c b/clang/test/CodeGen/bounds-checking.c
index 1b9e28915e5d9af..636d4f289e24786 100644
--- a/clang/test/CodeGen/bounds-checking.c
+++ b/clang/test/CodeGen/bounds-checking.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -fsanitize-trap=local-bounds -O3 -mllvm -bounds-checking-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTLOCAL
+// RUN: %clang_cc1 -fsanitize=array-bounds -fsanitize-trap=array-bounds -O3 -mllvm -ubsan-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTARRAY
 //
 // REQUIRES: x86-registered-target
 
@@ -66,3 +68,17 @@ int f7(union U *u, int i) {
   // CHECK-NOT: @llvm.ubsantrap
   return u->c[i];
 }
+
+
+char B[10];
+char B2[10];
+// CHECK-LABEL: @f8
+void f8(int i, int k) {
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 3)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 2)
+  B[i] = '\0';
+
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 5)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 4)
+  B2[k] = '\0';
+}
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 709095184af5df5..ee5b819604170eb 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -37,6 +37,9 @@ using namespace llvm;
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
                                   cl::desc("Use one trap block per function"));
 
+static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
+                                 cl::desc("Always use one trap per check"));
+
 STATISTIC(ChecksAdded, "Bounds checks added");
 STATISTIC(ChecksSkipped, "Bounds checks skipped");
 STATISTIC(ChecksUnable, "Bounds checks unable to add");
@@ -180,19 +183,27 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
   // will create a fresh block every time it is called.
   BasicBlock *TrapBB = nullptr;
   auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
-    if (TrapBB && SingleTrapBB)
-      return TrapBB;
-
     Function *Fn = IRB.GetInsertBlock()->getParent();
-    // FIXME: This debug location doesn't make a lot of sense in the
-    // `SingleTrapBB` case.
     auto DebugLoc = IRB.getCurrentDebugLocation();
     IRBuilder<>::InsertPointGuard Guard(IRB);
+
+    if (TrapBB && SingleTrapBB && !DebugTrapBB)
+      return TrapBB;
+
     TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
     IRB.SetInsertPoint(TrapBB);
 
-    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
-    CallInst *TrapCall = IRB.CreateCall(F, {});
+    Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : Intrinsic::trap;
+    auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
+
+    CallInst *TrapCall;
+    if (DebugTrapBB) {
+      TrapCall =
+          IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
+    } else {
+      TrapCall = IRB.CreateCall(F, {});
+    }
+
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     TrapCall->setDebugLoc(DebugLoc);
diff --git a/llvm/test/MC/AArch64/local-bounds-single-trap.ll b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
new file mode 100644
index 000000000000000..53a0e010537f096
--- /dev/null
+++ b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ASM
+; What this test does is check that even with nomerge, the functions still get merged in
+; compiled code as the ubsantrap call gets lowered to a single instruction: brk.
+
+
+ at B = dso_local global [10 x i8] zeroinitializer, align 1
+ at B2 = dso_local global [10 x i8] zeroinitializer, align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
+entry:
+; CHECK-ASM: 	cmp	x8, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.1:                               // %entry
+; CHECK-ASM: 	mov	w9, #10                         // =0xa
+; CHECK-ASM: 	sub	x9, x9, x8
+; CHECK-ASM: 	cbz	x9, .LBB0_5
+; CHECK-ASM: // %bb.2:
+; CHECK-ASM: 	ldrsw	x9, [sp, #8]
+; CHECK-ASM: 	adrp	x10, B
+; CHECK-ASM: 	add	x10, x10, :lo12:B
+; CHECK-ASM: 	strb	wzr, [x10, x8]
+; CHECK-ASM: 	cmp	x9, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.3:
+; CHECK-ASM: 	mov	w8, #10                         // =0xa
+; CHECK-ASM: 	sub	x8, x8, x9
+; CHECK-ASM: 	cbz	x8, .LBB0_5
+; CHECK-ASM: // %bb.4:
+; CHECK-ASM: 	adrp	x8, B2
+; CHECK-ASM: 	add	x8, x8, :lo12:B2
+; CHECK-ASM: 	strb	wzr, [x8, x9]
+; CHECK-ASM: 	add	sp, sp, #16
+; CHECK-ASM: 	.cfi_def_cfa_offset 0
+; CHECK-ASM: 	ret
+; CHECK-ASM: .LBB0_5:                                // %trap3
+; CHECK-ASM: 	.cfi_restore_state
+; CHECK-ASM: 	brk	#0x1
+  %i.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  store i32 %k, ptr %k.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  %idxprom = sext i32 %0 to i64
+  %1 = add i64 0, %idxprom
+  %arrayidx = getelementptr inbounds [10 x i8], ptr @B, i64 0, i64 %idxprom
+  %2 = sub i64 10, %1
+  %3 = icmp ult i64 10, %1
+  %4 = icmp ult i64 %2, 1
+  %5 = or i1 %3, %4
+  br i1 %5, label %trap, label %6
+
+6:                                                ; preds = %entry
+  store i8 0, ptr %arrayidx, align 1
+  %7 = load i32, ptr %k.addr, align 4
+  %idxprom1 = sext i32 %7 to i64
+  %8 = add i64 0, %idxprom1
+  %arrayidx2 = getelementptr inbounds [10 x i8], ptr @B2, i64 0, i64 %idxprom1
+  %9 = sub i64 10, %8
+  %10 = icmp ult i64 10, %8
+  %11 = icmp ult i64 %9, 1
+  %12 = or i1 %10, %11
+  br i1 %12, label %trap3, label %13
+
+13:                                               ; preds = %6
+  store i8 0, ptr %arrayidx2, align 1
+  ret void
+
+trap:                                             ; preds = %entry
+  call void @llvm.trap() #2
+  unreachable
+
+trap3:                                            ; preds = %6
+  call void @llvm.trap() #2
+  unreachable
+}
+
+; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
+declare void @llvm.trap() #1
+
+attributes #0 = { noinline nounwind uwtable }
+attributes #1 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #2 = { noreturn nounwind nomerge }
</pre>

</details>

https://github.com/llvm/llvm-project/pull/65972


More information about the cfe-commits mailing list