[llvm] [ASAN][AMDGPU] Make address sanitizer checks more efficient for the divergent target. (PR #72247)

Valery Pykhtin via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 01:04:22 PST 2023


https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/72247

>From 8bc22e15698398f280c9d6d017249446aaa77501 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 14 Nov 2023 11:42:00 +0100
Subject: [PATCH] [ASAN][AMDGPU] Make address sanitizer checks more efficient
 for the divergent target.

---
 .../Instrumentation/AddressSanitizer.cpp      |  38 +++-
 .../asan_instrument_constant_address_space.ll |  74 ++++----
 .../asan_instrument_generic_address_space.ll  | 172 ++++++++++--------
 .../asan_instrument_global_address_space.ll   | 148 ++++++++-------
 4 files changed, 259 insertions(+), 173 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index da157c966bfcb..49e31796e69a3 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -174,6 +174,8 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
 
 const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
 const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
+const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
@@ -699,6 +701,8 @@ struct AddressSanitizer {
                                        Instruction *InsertBefore, Value *Addr,
                                        uint32_t TypeStoreSize, bool IsWrite,
                                        Value *SizeArgument);
+  Instruction *genAMDGPUReportBlock(IRBuilder<> &IRB, Value *Cond,
+                                    bool Recover);
   void instrumentUnusualSizeOrAlignment(Instruction *I,
                                         Instruction *InsertBefore, Value *Addr,
                                         TypeSize TypeStoreSize, bool IsWrite,
@@ -1721,6 +1725,30 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
   return InsertBefore;
 }
 
+Instruction *AddressSanitizer::genAMDGPUReportBlock(IRBuilder<> &IRB,
+                                                    Value *Cond, bool Recover) {
+  Module &M = *IRB.GetInsertBlock()->getModule();
+  Value *ReportCond = Cond;
+  if (!Recover) {
+    auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
+                                        IRB.getInt1Ty());
+    ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
+  }
+
+  auto *Trm =
+      SplitBlockAndInsertIfThen(ReportCond, &*IRB.GetInsertPoint(), false,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+  Trm->getParent()->setName("asan.report");
+
+  if (Recover)
+    return Trm;
+
+  Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
+  IRB.SetInsertPoint(Trm);
+  return IRB.CreateCall(
+      M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
+}
+
 void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
                                          Instruction *InsertBefore, Value *Addr,
                                          MaybeAlign Alignment,
@@ -1772,7 +1800,15 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   size_t Granularity = 1ULL << Mapping.Scale;
   Instruction *CrashTerm = nullptr;
 
-  if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) {
+  bool GenSlowPath = (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity));
+
+  if (TargetTriple.isAMDGCN()) {
+    if (GenSlowPath) {
+      auto *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize);
+      Cmp = IRB.CreateAnd(Cmp, Cmp2);
+    }
+    CrashTerm = genAMDGPUReportBlock(IRB, Cmp, Recover);
+  } else if (GenSlowPath) {
     // We use branch weights for the slow path check, to indicate that the slow
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
     Instruction *CheckTerm = SplitBlockAndInsertIfThen(
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
index 47b289ba32b80..93f8f48f1e613 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
@@ -18,17 +18,23 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -42,19 +48,16 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF2:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -75,11 +78,18 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF2]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -93,11 +103,11 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF2]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
 ; RECOV-NEXT:    ret void
 ;
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
index 58af1eafa1801..121b429953f3d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -21,19 +21,25 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
-; CHECK:       16:
-; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       17:
-; CHECK-NEXT:    br label [[TMP18]]
+; CHECK-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP16]], 0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
 ; CHECK:       18:
+; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP19]]
+; CHECK:       19:
+; CHECK-NEXT:    br label [[TMP20]]
+; CHECK:       20:
+; CHECK-NEXT:    br label [[TMP21]]
+; CHECK:       21:
 ; CHECK-NEXT:    store i32 0, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -45,7 +51,7 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP19:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -53,21 +59,18 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP18:%.*]], !prof [[PROF0:![0-9]+]]
-; RECOV:       11:
-; RECOV-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; RECOV-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; RECOV-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; RECOV-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; RECOV-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]]
-; RECOV:       16:
+; RECOV-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; RECOV-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; RECOV-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; RECOV-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; RECOV-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; RECOV-NEXT:    br i1 [[TMP15]], label [[ASAN_REPORT:%.*]], label [[TMP16:%.*]], !prof [[PROF0:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store4_noabort(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
+; RECOV-NEXT:    br label [[TMP16]]
+; RECOV:       16:
 ; RECOV-NEXT:    br label [[TMP17]]
 ; RECOV:       17:
-; RECOV-NEXT:    br label [[TMP18]]
-; RECOV:       18:
-; RECOV-NEXT:    br label [[TMP19]]
-; RECOV:       19:
 ; RECOV-NEXT:    store i32 0, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -87,7 +90,7 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -95,19 +98,25 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0]]
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
-; CHECK:       16:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       17:
-; CHECK-NEXT:    br label [[TMP18]]
+; CHECK-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP16]], 0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
 ; CHECK:       18:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP19]]
+; CHECK:       19:
+; CHECK-NEXT:    br label [[TMP20]]
+; CHECK:       20:
+; CHECK-NEXT:    br label [[TMP21]]
+; CHECK:       21:
 ; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -119,7 +128,7 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP19:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -127,21 +136,18 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP18:%.*]], !prof [[PROF0]]
-; RECOV:       11:
-; RECOV-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; RECOV-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; RECOV-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; RECOV-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; RECOV-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]]
-; RECOV:       16:
+; RECOV-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; RECOV-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; RECOV-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; RECOV-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; RECOV-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; RECOV-NEXT:    br i1 [[TMP15]], label [[ASAN_REPORT:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP16]]
+; RECOV:       16:
 ; RECOV-NEXT:    br label [[TMP17]]
 ; RECOV:       17:
-; RECOV-NEXT:    br label [[TMP18]]
-; RECOV:       18:
-; RECOV-NEXT:    br label [[TMP19]]
-; RECOV:       19:
 ; RECOV-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -161,7 +167,7 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -169,13 +175,20 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
-; CHECK-NEXT:    br label [[TMP13]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
 ; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
 ; CHECK-NEXT:    store i64 0, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -187,7 +200,7 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -195,13 +208,13 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store8_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    br label [[TMP12]]
 ; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
 ; RECOV-NEXT:    store i64 0, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;
@@ -220,7 +233,7 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -228,13 +241,20 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
-; CHECK-NEXT:    br label [[TMP13]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
 ; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
 ; CHECK-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -246,7 +266,7 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -254,13 +274,13 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    br label [[TMP12]]
 ; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
 ; RECOV-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
index e792c453a723e..0d88e82a142e2 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
@@ -14,17 +14,23 @@ define protected amdgpu_kernel void @global_store(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -37,19 +43,16 @@ define protected amdgpu_kernel void @global_store(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF0:![0-9]+]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -69,17 +72,23 @@ define protected amdgpu_kernel void @global_load(ptr addrspace(1) %p, i32 %i) sa
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(1) [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -92,19 +101,16 @@ define protected amdgpu_kernel void @global_load(ptr addrspace(1) %p, i32 %i) sa
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF0]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(1) [[P]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -124,11 +130,18 @@ define protected amdgpu_kernel void @global_store_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    store i64 0, ptr addrspace(1) [[P]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -141,11 +154,11 @@ define protected amdgpu_kernel void @global_store_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    store i64 0, ptr addrspace(1) [[P]], align 8
 ; RECOV-NEXT:    ret void
 ;
@@ -164,11 +177,18 @@ define protected amdgpu_kernel void @global_load_8(ptr addrspace(1) %p) sanitize
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(1) [[P]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -181,11 +201,11 @@ define protected amdgpu_kernel void @global_load_8(ptr addrspace(1) %p) sanitize
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(1) [[P]], align 8
 ; RECOV-NEXT:    ret void
 ;



More information about the llvm-commits mailing list