[llvm] 832b3b2 - Modify BoundsSan to improve debuggability (#65972)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 15:34:38 PDT 2023


Author: Oskar Wirga
Date: 2023-09-29T15:34:31-07:00
New Revision: 832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12

URL: https://github.com/llvm/llvm-project/commit/832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12
DIFF: https://github.com/llvm/llvm-project/commit/832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12.diff

LOG: Modify BoundsSan to improve debuggability (#65972)

Context
BoundsSanitizer is a mitigation that is part of UBSAN. It can be enabled
in "trap" mode to crash on OOB array accesses.

Problem
BoundsSan has zero false positives meaning every crash is a OOB array
access, unfortunately optimizations cause these crashes in production
builds to be a bit useless because we only know which function is
crashing but not which line of code.

Godbolt example of the optimization: https://godbolt.org/z/6qjax9z1b

This Diff
I wanted to provide a way to know exactly which LOC is responsible for
the crash. What we do here is use the size of the basic block as an
iterator to an immediate value for the ubsan trap.

Previous discussion: https://reviews.llvm.org/D148654

Added: 
    llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
    llvm/test/MC/AArch64/local-bounds-single-trap.ll

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/test/CodeGen/bounds-checking.c
    llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3123e278985f210..7b4389c874b3f90 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -51,6 +51,12 @@
 using namespace clang;
 using namespace CodeGen;
 
+// Experiment to make sanitizers easier to debug
+static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization(
+    "ubsan-unique-traps", llvm::cl::Optional,
+    llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"),
+    llvm::cl::init(false));
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3553,17 +3559,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
   // check-type per function to save on code size.
   if (TrapBBs.size() <= CheckHandlerID)
     TrapBBs.resize(CheckHandlerID + 1);
+
   llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
 
-  if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
-      (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+  if (!ClSanitizeDebugDeoptimization &&
+      CGM.getCodeGenOpts().OptimizationLevel && TrapBB &&
+      (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+    auto Call = TrapBB->begin();
+    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
+
+    Call->applyMergedLocation(Call->getDebugLoc(),
+                              Builder.getCurrentDebugLocation());
+    Builder.CreateCondBr(Checked, Cont, TrapBB);
+  } else {
     TrapBB = createBasicBlock("trap");
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
 
-    llvm::CallInst *TrapCall =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
-                           llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
+    llvm::CallInst *TrapCall = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
+        llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization
+                                               ? TrapBB->getParent()->size()
+                                               : CheckHandlerID));
 
     if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
       auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
@@ -3573,13 +3590,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
-  } else {
-    auto Call = TrapBB->begin();
-    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
-
-    Call->applyMergedLocation(Call->getDebugLoc(),
-                              Builder.getCurrentDebugLocation());
-    Builder.CreateCondBr(Checked, Cont, TrapBB);
   }
 
   EmitBlock(Cont);

diff  --git a/clang/test/CodeGen/bounds-checking.c b/clang/test/CodeGen/bounds-checking.c
index 1b9e28915e5d9af..636d4f289e24786 100644
--- a/clang/test/CodeGen/bounds-checking.c
+++ b/clang/test/CodeGen/bounds-checking.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds -emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -fsanitize-trap=local-bounds -O3 -mllvm -bounds-checking-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTLOCAL
+// RUN: %clang_cc1 -fsanitize=array-bounds -fsanitize-trap=array-bounds -O3 -mllvm -ubsan-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | FileCheck %s --check-prefixes=NOOPTARRAY
 //
 // REQUIRES: x86-registered-target
 
@@ -66,3 +68,17 @@ int f7(union U *u, int i) {
   // CHECK-NOT: @llvm.ubsantrap
   return u->c[i];
 }
+
+
+char B[10];
+char B2[10];
+// CHECK-LABEL: @f8
+void f8(int i, int k) {
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 3)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 2)
+  B[i] = '\0';
+
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 5)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 4)
+  B2[k] = '\0';
+}

diff  --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 709095184af5df5..ee5b819604170eb 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -37,6 +37,9 @@ using namespace llvm;
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
                                   cl::desc("Use one trap block per function"));
 
+static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
+                                 cl::desc("Always use one trap per check"));
+
 STATISTIC(ChecksAdded, "Bounds checks added");
 STATISTIC(ChecksSkipped, "Bounds checks skipped");
 STATISTIC(ChecksUnable, "Bounds checks unable to add");
@@ -180,19 +183,27 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
   // will create a fresh block every time it is called.
   BasicBlock *TrapBB = nullptr;
   auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
-    if (TrapBB && SingleTrapBB)
-      return TrapBB;
-
     Function *Fn = IRB.GetInsertBlock()->getParent();
-    // FIXME: This debug location doesn't make a lot of sense in the
-    // `SingleTrapBB` case.
     auto DebugLoc = IRB.getCurrentDebugLocation();
     IRBuilder<>::InsertPointGuard Guard(IRB);
+
+    if (TrapBB && SingleTrapBB && !DebugTrapBB)
+      return TrapBB;
+
     TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
     IRB.SetInsertPoint(TrapBB);
 
-    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
-    CallInst *TrapCall = IRB.CreateCall(F, {});
+    Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : Intrinsic::trap;
+    auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
+
+    CallInst *TrapCall;
+    if (DebugTrapBB) {
+      TrapCall =
+          IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
+    } else {
+      TrapCall = IRB.CreateCall(F, {});
+    }
+
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     TrapCall->setDebugLoc(DebugLoc);

diff  --git a/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
new file mode 100644
index 000000000000000..a3f34007e9b09f8
--- /dev/null
+++ b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=bounds-checking -bounds-checking-unique-traps -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare noalias ptr @malloc(i64) nounwind allocsize(0)
+
+define void @f() nounwind {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
+; CHECK-NEXT:    br label [[TRAP:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    store i32 3, ptr [[IDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    br label [[TRAP1:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    store i32 3, ptr [[IDX2]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 8
+; CHECK-NEXT:    br label [[TRAP2:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    store i32 3, ptr [[IDX3]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 3) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap1:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 5) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap2:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 7) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+;
+  %1 = tail call ptr @malloc(i64 32)
+  %idx = getelementptr inbounds i32, ptr %1, i64 8
+  store i32 3, ptr %idx, align 4
+  %2 = tail call ptr @malloc(i64 32)
+  %idx2 = getelementptr inbounds i32, ptr %2, i64 8
+  store i32 3, ptr %idx2, align 4
+  %3 = tail call ptr @malloc(i64 32)
+  %idx3 = getelementptr inbounds i32, ptr %3, i64 8
+  store i32 3, ptr %idx3, align 4
+  ret void
+}

diff  --git a/llvm/test/MC/AArch64/local-bounds-single-trap.ll b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
new file mode 100644
index 000000000000000..53a0e010537f096
--- /dev/null
+++ b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ASM
+; What this test does is check that even with nomerge, the functions still get merged in
+; compiled code as the ubsantrap call gets lowered to a single instruction: brk.
+
+
+ at B = dso_local global [10 x i8] zeroinitializer, align 1
+ at B2 = dso_local global [10 x i8] zeroinitializer, align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
+entry:
+; CHECK-ASM: 	cmp	x8, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.1:                               // %entry
+; CHECK-ASM: 	mov	w9, #10                         // =0xa
+; CHECK-ASM: 	sub	x9, x9, x8
+; CHECK-ASM: 	cbz	x9, .LBB0_5
+; CHECK-ASM: // %bb.2:
+; CHECK-ASM: 	ldrsw	x9, [sp, #8]
+; CHECK-ASM: 	adrp	x10, B
+; CHECK-ASM: 	add	x10, x10, :lo12:B
+; CHECK-ASM: 	strb	wzr, [x10, x8]
+; CHECK-ASM: 	cmp	x9, #10
+; CHECK-ASM: 	b.hi	.LBB0_5
+; CHECK-ASM: // %bb.3:
+; CHECK-ASM: 	mov	w8, #10                         // =0xa
+; CHECK-ASM: 	sub	x8, x8, x9
+; CHECK-ASM: 	cbz	x8, .LBB0_5
+; CHECK-ASM: // %bb.4:
+; CHECK-ASM: 	adrp	x8, B2
+; CHECK-ASM: 	add	x8, x8, :lo12:B2
+; CHECK-ASM: 	strb	wzr, [x8, x9]
+; CHECK-ASM: 	add	sp, sp, #16
+; CHECK-ASM: 	.cfi_def_cfa_offset 0
+; CHECK-ASM: 	ret
+; CHECK-ASM: .LBB0_5:                                // %trap3
+; CHECK-ASM: 	.cfi_restore_state
+; CHECK-ASM: 	brk	#0x1
+  %i.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  store i32 %k, ptr %k.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  %idxprom = sext i32 %0 to i64
+  %1 = add i64 0, %idxprom
+  %arrayidx = getelementptr inbounds [10 x i8], ptr @B, i64 0, i64 %idxprom
+  %2 = sub i64 10, %1
+  %3 = icmp ult i64 10, %1
+  %4 = icmp ult i64 %2, 1
+  %5 = or i1 %3, %4
+  br i1 %5, label %trap, label %6
+
+6:                                                ; preds = %entry
+  store i8 0, ptr %arrayidx, align 1
+  %7 = load i32, ptr %k.addr, align 4
+  %idxprom1 = sext i32 %7 to i64
+  %8 = add i64 0, %idxprom1
+  %arrayidx2 = getelementptr inbounds [10 x i8], ptr @B2, i64 0, i64 %idxprom1
+  %9 = sub i64 10, %8
+  %10 = icmp ult i64 10, %8
+  %11 = icmp ult i64 %9, 1
+  %12 = or i1 %10, %11
+  br i1 %12, label %trap3, label %13
+
+13:                                               ; preds = %6
+  store i8 0, ptr %arrayidx2, align 1
+  ret void
+
+trap:                                             ; preds = %entry
+  call void @llvm.trap() #2
+  unreachable
+
+trap3:                                            ; preds = %6
+  call void @llvm.trap() #2
+  unreachable
+}
+
+; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
+declare void @llvm.trap() #1
+
+attributes #0 = { noinline nounwind uwtable }
+attributes #1 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #2 = { noreturn nounwind nomerge }


        


More information about the llvm-commits mailing list