[llvm] cb7fe9a - [ASAN][AMDGPU] Make address sanitizer checks more efficient for the divergent target. (#72247)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 4 13:58:37 PST 2024


Author: Valery Pykhtin
Date: 2024-01-04T13:58:32-08:00
New Revision: cb7fe9ad4c3103d90c20d55819a9e69ab66ab3d0

URL: https://github.com/llvm/llvm-project/commit/cb7fe9ad4c3103d90c20d55819a9e69ab66ab3d0
DIFF: https://github.com/llvm/llvm-project/commit/cb7fe9ad4c3103d90c20d55819a9e69ab66ab3d0.diff

LOG: [ASAN][AMDGPU] Make address sanitizer checks more efficient for the divergent target. (#72247)

Address sanitizer checks for AMDGPU target in non-recovery mode aren't
quite efficient at the moment which can be illustrated with a program:
```
instr_before; 
load ptr1; 
instr_in_the_middle; 
load ptr2; 
instr_after; 
```
ASAN generates the following instrumentation:
```
instr_before; 
if (sanity_check_passed(ptr1)) 
  load ptr1; 
  instr_in_the_middle; 
  if (sanity_check_passed(ptr2)) 
     load ptr2; 
     instr_after; 
  else 
     // ASAN report block 2 
     __asan_report(ptr2); // wave terminates   
     unreachable; 
else 
   // ASAN report block 1 
  __asan_report(ptr1); // wave terminates 
  unreachable; 
```
Each sanitizer check is treated as a non-uniform condition (and this is
true because some lanes may pass the check and some don't). This results
in the program above: basically normal program flow is continued in
_then_ blocks. This way it allows lanes that pass all sanity checks to
complete the program and then the wave terminates at the first reporting
_else_ block. For each _else_ block it has to keep execmask and pointer
value to report error consuming tons (megatons!) of registers which are
live till the program end.

This patch changes the behavior on a failing sanity check: instead of
waiting when passing lanes reach program end report error and terminate
as soon as any lane has violated the sanity check. Sanity check
condition is treated uniform with this approach and the resulting
program looks much like ordinary CPU code:

```
instr_before; 
if (any_lane_violated(sanity_check_passed(ptr1)))
  // ASAN report block 1 
  __asan_report(ptr1); // abort the program 
  unreachable; 
load ptr1; 
instr_in_the_middle; 
if (any_lane_violated(sanity_check_passed(ptr2))) 
  // ASAN report block 2   
  __asan_report(ptr2); // abort the program 
  unreachable; 
load ptr2; 
instr_after; 
```

However it has to use a trick to pass structurizer and some later
passes: ASAN check is generated like in recovery mode but reporting
function aborts, that is standard _unreachable_ instruction isn't used:
```
...
if (any_lane_violated(sanity_check_passed(ptr1)))
  // ASAN report block 1 
  __asan_report(ptr1); // abort the program 
  // pretend we're going to continue the program
load ptr1; 
...
```
This may create some undesirable effects:
1. Register allocator generates a lot of code for save/restore registers
for asan_report call. This may potentially bloat the code since we have
a report block for every accessed pointer.
2. Loop invariant code in report blocks is hoisted into a loop
preheader. I'm not sure but probably this can be solved using block
frequency information, but most likely this isn't a problem at all.

These problems are to be addressed later.

### Flattening address sanitizer check 

In order to simplify divergent CFG this patch also changes the
instrumentation code from:

```
  uint64_t address = ptr; 
  sbyte *shadow_address = MemToShadow(address); 
  sbyte shadow_value = *shadow_address; 
  if (shadow_value) { 
    sbyte last_accessed_byte = (address & 7) + kAccessSize - 1; 
    if (last_accessed_byte >= shadow_value) { 
      ReportError(address, kAccessSize, kIsWrite); 
      abort(); 
    } 
  } 
```
to 
```
  uint64_t address = ptr; 
  sbyte *shadow_address = MemToShadow(address); 
  sbyte shadow_value = *shadow_address; 

  sbyte last_accessed_byte = (address & 7) + kAccessSize - 1; 
  if (shadow_value && last_accessed_byte >= shadow_value) { 
    ReportError(address, kAccessSize, kIsWrite); 
    abort(); 
  } 
```
It saves one _if_ which really avoids very few instructions and their
latency can be hidden by the load from shadow memory.

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
    llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
    llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
    llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index afb0e6cd1548b0..d4f5bf8c393568 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -174,6 +174,8 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
 
 const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
 const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
+const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
@@ -699,6 +701,8 @@ struct AddressSanitizer {
                                        Instruction *InsertBefore, Value *Addr,
                                        uint32_t TypeStoreSize, bool IsWrite,
                                        Value *SizeArgument);
+  Instruction *genAMDGPUReportBlock(IRBuilder<> &IRB, Value *Cond,
+                                    bool Recover);
   void instrumentUnusualSizeOrAlignment(Instruction *I,
                                         Instruction *InsertBefore, Value *Addr,
                                         TypeSize TypeStoreSize, bool IsWrite,
@@ -1721,6 +1725,30 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
   return InsertBefore;
 }
 
+Instruction *AddressSanitizer::genAMDGPUReportBlock(IRBuilder<> &IRB,
+                                                    Value *Cond, bool Recover) {
+  Module &M = *IRB.GetInsertBlock()->getModule();
+  Value *ReportCond = Cond;
+  if (!Recover) {
+    auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
+                                        IRB.getInt1Ty());
+    ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
+  }
+
+  auto *Trm =
+      SplitBlockAndInsertIfThen(ReportCond, &*IRB.GetInsertPoint(), false,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+  Trm->getParent()->setName("asan.report");
+
+  if (Recover)
+    return Trm;
+
+  Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
+  IRB.SetInsertPoint(Trm);
+  return IRB.CreateCall(
+      M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
+}
+
 void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
                                          Instruction *InsertBefore, Value *Addr,
                                          MaybeAlign Alignment,
@@ -1772,7 +1800,15 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   size_t Granularity = 1ULL << Mapping.Scale;
   Instruction *CrashTerm = nullptr;
 
-  if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) {
+  bool GenSlowPath = (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity));
+
+  if (TargetTriple.isAMDGCN()) {
+    if (GenSlowPath) {
+      auto *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize);
+      Cmp = IRB.CreateAnd(Cmp, Cmp2);
+    }
+    CrashTerm = genAMDGPUReportBlock(IRB, Cmp, Recover);
+  } else if (GenSlowPath) {
     // We use branch weights for the slow path check, to indicate that the slow
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
     Instruction *CheckTerm = SplitBlockAndInsertIfThen(

diff  --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
index 4ce337e2b68ef1..cdead5b26c7e54 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll
@@ -18,17 +18,23 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -42,19 +48,16 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF2:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -75,11 +78,18 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF2]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -93,11 +103,11 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF2]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
 ; RECOV-NEXT:    ret void
 ;

diff  --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
index f86a5722c0006d..cb37ba24f1c74e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
@@ -13,7 +13,7 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -21,19 +21,25 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
-; CHECK:       16:
-; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       17:
-; CHECK-NEXT:    br label [[TMP18]]
+; CHECK-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP16]], 0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
 ; CHECK:       18:
+; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP19]]
+; CHECK:       19:
+; CHECK-NEXT:    br label [[TMP20]]
+; CHECK:       20:
+; CHECK-NEXT:    br label [[TMP21]]
+; CHECK:       21:
 ; CHECK-NEXT:    store i32 0, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -45,7 +51,7 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP19:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -53,21 +59,18 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP18:%.*]], !prof [[PROF0:![0-9]+]]
-; RECOV:       11:
-; RECOV-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; RECOV-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; RECOV-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; RECOV-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; RECOV-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]]
-; RECOV:       16:
+; RECOV-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; RECOV-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; RECOV-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; RECOV-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; RECOV-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; RECOV-NEXT:    br i1 [[TMP15]], label [[ASAN_REPORT:%.*]], label [[TMP16:%.*]], !prof [[PROF0:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store4_noabort(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
+; RECOV-NEXT:    br label [[TMP16]]
+; RECOV:       16:
 ; RECOV-NEXT:    br label [[TMP17]]
 ; RECOV:       17:
-; RECOV-NEXT:    br label [[TMP18]]
-; RECOV:       18:
-; RECOV-NEXT:    br label [[TMP19]]
-; RECOV:       19:
 ; RECOV-NEXT:    store i32 0, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -87,7 +90,7 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP18:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -95,19 +98,25 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP17:%.*]], !prof [[PROF0]]
-; CHECK:       11:
-; CHECK-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17]]
-; CHECK:       16:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       17:
-; CHECK-NEXT:    br label [[TMP18]]
+; CHECK-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne i64 [[TMP16]], 0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
 ; CHECK:       18:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP19]]
+; CHECK:       19:
+; CHECK-NEXT:    br label [[TMP20]]
+; CHECK:       20:
+; CHECK-NEXT:    br label [[TMP21]]
+; CHECK:       21:
 ; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -119,7 +128,7 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP19:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -127,21 +136,18 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP18:%.*]], !prof [[PROF0]]
-; RECOV:       11:
-; RECOV-NEXT:    [[TMP12:%.*]] = and i64 [[TMP5]], 7
-; RECOV-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 3
-; RECOV-NEXT:    [[TMP14:%.*]] = trunc i64 [[TMP13]] to i8
-; RECOV-NEXT:    [[TMP15:%.*]] = icmp sge i8 [[TMP14]], [[TMP9]]
-; RECOV-NEXT:    br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]]
-; RECOV:       16:
+; RECOV-NEXT:    [[TMP11:%.*]] = and i64 [[TMP5]], 7
+; RECOV-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 3
+; RECOV-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8
+; RECOV-NEXT:    [[TMP14:%.*]] = icmp sge i8 [[TMP13]], [[TMP9]]
+; RECOV-NEXT:    [[TMP15:%.*]] = and i1 [[TMP10]], [[TMP14]]
+; RECOV-NEXT:    br i1 [[TMP15]], label [[ASAN_REPORT:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP16]]
+; RECOV:       16:
 ; RECOV-NEXT:    br label [[TMP17]]
 ; RECOV:       17:
-; RECOV-NEXT:    br label [[TMP18]]
-; RECOV:       18:
-; RECOV-NEXT:    br label [[TMP19]]
-; RECOV:       19:
 ; RECOV-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -161,7 +167,7 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -169,13 +175,20 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
-; CHECK-NEXT:    br label [[TMP13]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
 ; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
 ; CHECK-NEXT:    store i64 0, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -187,7 +200,7 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -195,13 +208,13 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store8_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    br label [[TMP12]]
 ; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
 ; RECOV-NEXT:    store i64 0, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;
@@ -220,7 +233,7 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
 ; CHECK:       4:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -228,13 +241,20 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
-; CHECK-NEXT:    br label [[TMP13]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
 ; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
 ; CHECK-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -246,7 +266,7 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
 ; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
-; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
+; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
 ; RECOV:       4:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
@@ -254,13 +274,13 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; RECOV-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
 ; RECOV-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP5]]) #[[ATTR3]]
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    br label [[TMP12]]
 ; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
 ; RECOV-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;

diff  --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
index 6b39ff6ff8462a..a954b173eb2aa5 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_global_address_space.ll
@@ -14,17 +14,23 @@ define protected amdgpu_kernel void @global_store(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -37,19 +43,16 @@ define protected amdgpu_kernel void @global_store(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF0:![0-9]+]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0:![0-9]+]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    store i32 0, ptr addrspace(1) [[P]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -69,17 +72,23 @@ define protected amdgpu_kernel void @global_load(ptr addrspace(1) %p, i32 %i) sa
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0]]
-; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
-; CHECK:       11:
-; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       12:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
+; CHECK:       13:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP14]]
+; CHECK:       14:
+; CHECK-NEXT:    br label [[TMP15]]
+; CHECK:       15:
 ; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(1) [[P]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -92,19 +101,16 @@ define protected amdgpu_kernel void @global_load(ptr addrspace(1) %p, i32 %i) sa
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF0]]
-; RECOV:       6:
-; RECOV-NEXT:    [[TMP7:%.*]] = and i64 [[TMP0]], 7
-; RECOV-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], 3
-; RECOV-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
-; RECOV-NEXT:    [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
-; RECOV-NEXT:    br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
-; RECOV:       11:
+; RECOV-NEXT:    [[TMP6:%.*]] = and i64 [[TMP0]], 7
+; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 3
+; RECOV-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
+; RECOV-NEXT:    [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
+; RECOV-NEXT:    [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
+; RECOV-NEXT:    br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
-; RECOV-NEXT:    br label [[TMP13]]
-; RECOV:       13:
+; RECOV-NEXT:    br label [[TMP11]]
+; RECOV:       11:
 ; RECOV-NEXT:    [[Q:%.*]] = load i32, ptr addrspace(1) [[P]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -124,11 +130,18 @@ define protected amdgpu_kernel void @global_store_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    store i64 0, ptr addrspace(1) [[P]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -141,11 +154,11 @@ define protected amdgpu_kernel void @global_store_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    store i64 0, ptr addrspace(1) [[P]], align 8
 ; RECOV-NEXT:    ret void
 ;
@@ -164,11 +177,18 @@ define protected amdgpu_kernel void @global_load_8(ptr addrspace(1) %p) sanitize
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK:       asan.report:
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
 ; CHECK-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(1) [[P]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -181,11 +201,11 @@ define protected amdgpu_kernel void @global_load_8(ptr addrspace(1) %p) sanitize
 ; RECOV-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
 ; RECOV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; RECOV-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; RECOV-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; RECOV:       6:
+; RECOV-NEXT:    br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
-; RECOV-NEXT:    br label [[TMP7]]
-; RECOV:       7:
+; RECOV-NEXT:    br label [[TMP6]]
+; RECOV:       6:
 ; RECOV-NEXT:    [[Q:%.*]] = load i64, ptr addrspace(1) [[P]], align 8
 ; RECOV-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list