[llvm] [WIP] Separate Asan instrumentation in amdgpu-sw-lower-lds. (PR #134832)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 03:33:19 PDT 2025


https://github.com/skc7 created https://github.com/llvm/llvm-project/pull/134832

None

>From e40c1c95887230518f58b0efcc31c03fb2311f41 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 31 Dec 2024 15:46:04 +0530
Subject: [PATCH] [AMDGPU] Separate Asan instrumentation from sw lds lowering

---
 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp   |  92 ++++++----
 ...-lower-lds-dynamic-indirect-access-asan.ll | 108 ++++++------
 ...pu-sw-lower-lds-dynamic-indirect-access.ll |  59 +++----
 ...dgpu-sw-lower-lds-dynamic-lds-test-asan.ll |  47 ++---
 .../amdgpu-sw-lower-lds-dynamic-lds-test.ll   |  45 ++---
 ...lti-static-dynamic-indirect-access-asan.ll | 166 +++++++++---------
 ...ds-multi-static-dynamic-indirect-access.ll |  91 ++++------
 ...w-lower-lds-multiple-blocks-return-asan.ll |  45 ++---
 ...gpu-sw-lower-lds-multiple-blocks-return.ll |  68 +++----
 ...gpu-sw-lower-lds-non-kernel-declaration.ll |   2 +-
 ...lds-static-dynamic-indirect-access-asan.ll | 108 ++++++------
 ...ower-lds-static-dynamic-indirect-access.ll |  59 +++----
 ...-lower-lds-static-dynamic-lds-test-asan.ll |  95 +++++-----
 ...pu-sw-lower-lds-static-dynamic-lds-test.ll |  51 ++----
 ...w-lower-lds-static-indirect-access-asan.ll |   2 +-
 ...tic-indirect-access-function-param-asan.ll |  67 +++----
 ...s-static-indirect-access-function-param.ll |  55 +++---
 ...-lds-static-indirect-access-nested-asan.ll |  23 +--
 ...lower-lds-static-indirect-access-nested.ll | 107 ++++-------
 ...static-indirect-access-no-kernel-lds-id.ll |  41 ++---
 ...gpu-sw-lower-lds-static-indirect-access.ll |  41 ++---
 .../amdgpu-sw-lower-lds-static-lds-O0.ll      |   2 +-
 ...gpu-sw-lower-lds-static-lds-no-heap-ptr.ll |   2 +-
 ...mdgpu-sw-lower-lds-static-lds-test-asan.ll |  76 ++++----
 ...lds-static-lds-test-atomic-cmpxchg-asan.ll |  60 +++----
 ...ower-lds-static-lds-test-atomicrmw-asan.ll | 108 ++++++------
 .../amdgpu-sw-lower-lds-static-lds-test.ll    |  48 ++---
 ...gpu-sw-lower-lds-static-lds-vector-ptrs.ll |  25 +--
 28 files changed, 756 insertions(+), 937 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index cc0d374c99254..4a4b9e97648f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -353,9 +353,11 @@ void AMDGPUSwLowerLDS::buildSwLDSGlobal(Function *Func) {
       M, IRB.getPtrTy(), false, GlobalValue::InternalLinkage,
       PoisonValue::get(IRB.getPtrTy()), "llvm.amdgcn.sw.lds." + Func->getName(),
       nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  LDSParams.SwLDS->setSanitizerMetadata(MD);
+  if (AsanInstrumentLDS) {
+    GlobalValue::SanitizerMetadata MD;
+    MD.NoAddress = true;
+    LDSParams.SwLDS->setSanitizerMetadata(MD);
+  }
 }
 
 void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) {
@@ -371,9 +373,11 @@ void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) {
       "llvm.amdgcn." + Func->getName() + ".dynlds", nullptr,
       GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false);
   markUsedByKernel(Func, LDSParams.SwDynLDS);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  LDSParams.SwDynLDS->setSanitizerMetadata(MD);
+  if (AsanInstrumentLDS) {
+    GlobalValue::SanitizerMetadata MD;
+    MD.NoAddress = true;
+    LDSParams.SwDynLDS->setSanitizerMetadata(MD);
+  }
 }
 
 void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(Function *Func) {
@@ -436,8 +440,8 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
           Constant *ItemStartOffset = ConstantInt::get(Int32Ty, MallocSize);
           Constant *SizeInBytesConst = ConstantInt::get(Int32Ty, SizeInBytes);
           // Get redzone size corresponding a size.
-          const uint64_t RightRedzoneSize =
-              AMDGPU::getRedzoneSizeForGlobal(AsanScale, SizeInBytes);
+          const uint64_t RightRedzoneSize = AsanInstrumentLDS ?
+              AMDGPU::getRedzoneSizeForGlobal(AsanScale, SizeInBytes) : 0;
           // Update MallocSize with current size and redzone size.
           MallocSize += SizeInBytes;
           if (!AMDGPU::isDynamicLDS(*GV))
@@ -489,9 +493,11 @@ void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) {
   LDSParams.SwLDS->setAlignment(MaxAlignment);
   if (LDSParams.SwDynLDS)
     LDSParams.SwDynLDS->setAlignment(MaxAlignment);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  LDSParams.SwLDSMetadata->setSanitizerMetadata(MD);
+  if (AsanInstrumentLDS) {
+    GlobalValue::SanitizerMetadata MD;
+    MD.NoAddress = true;
+    LDSParams.SwLDSMetadata->setSanitizerMetadata(MD);
+  }
 }
 
 void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(Function *Func) {
@@ -869,22 +875,34 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
 
   // Create a call to malloc function which does device global memory allocation
   // with size equals to all LDS global accesses size in this kernel.
-  Value *ReturnAddress =
-      IRB.CreateIntrinsic(Intrinsic::returnaddress, {IRB.getInt32(0)});
-  FunctionCallee MallocFunc = M.getOrInsertFunction(
+  Value *MallocPtr;
+  if (AsanInstrumentLDS) {
+    Value *ReturnAddress =
+      IRB.CreateIntrinsic(Intrinsic::returnaddress, {}, {IRB.getInt32(0)});
+    FunctionCallee MallocFunc = M.getOrInsertFunction(
       StringRef("__asan_malloc_impl"),
       FunctionType::get(Int64Ty, {Int64Ty, Int64Ty}, false));
-  Value *RAPtrToInt = IRB.CreatePtrToInt(ReturnAddress, Int64Ty);
-  Value *MallocCall = IRB.CreateCall(MallocFunc, {CurrMallocSize, RAPtrToInt});
-
-  Value *MallocPtr =
+    Value *RAPtrToInt = IRB.CreatePtrToInt(ReturnAddress, Int64Ty);
+    Value *MallocCall = IRB.CreateCall(MallocFunc, {CurrMallocSize, RAPtrToInt});
+    MallocPtr =
+      IRB.CreateIntToPtr(MallocCall, IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS));
+  }
+  else {
+    Type *PtrTy = IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS);
+    FunctionCallee MallocFunc = M.getOrInsertFunction(
+      StringRef("__ockl_dm_alloc"),
+      FunctionType::get(PtrTy, {Int64Ty}, false));
+    Value *MallocCall = IRB.CreateCall(MallocFunc, {CurrMallocSize});
+    MallocPtr =
       IRB.CreateIntToPtr(MallocCall, IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS));
+  }
 
   // Create store of malloc to new global
   IRB.CreateStore(MallocPtr, SwLDS);
 
   // Create calls to __asan_poison_region to poison redzones.
-  poisonRedzones(Func, MallocPtr);
+  if (AsanInstrumentLDS)
+    poisonRedzones(Func, MallocPtr);
 
   // Create branch to PrevEntryBlock
   IRB.CreateBr(PrevEntryBlock);
@@ -932,14 +950,22 @@ void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func,
   IRB.SetInsertPoint(FreeBlock, FreeBlock->begin());
 
   // Free the previously allocate device global memory.
-  FunctionCallee AsanFreeFunc = M.getOrInsertFunction(
+  Value *MallocPtrToInt = IRB.CreatePtrToInt(LoadMallocPtr, Int64Ty);
+  if (AsanInstrumentLDS) {
+    FunctionCallee AsanFreeFunc = M.getOrInsertFunction(
       StringRef("__asan_free_impl"),
       FunctionType::get(IRB.getVoidTy(), {Int64Ty, Int64Ty}, false));
-  Value *ReturnAddr =
-      IRB.CreateIntrinsic(Intrinsic::returnaddress, IRB.getInt32(0));
-  Value *RAPToInt = IRB.CreatePtrToInt(ReturnAddr, Int64Ty);
-  Value *MallocPtrToInt = IRB.CreatePtrToInt(LoadMallocPtr, Int64Ty);
-  IRB.CreateCall(AsanFreeFunc, {MallocPtrToInt, RAPToInt});
+    Value *ReturnAddr =
+      IRB.CreateIntrinsic(Intrinsic::returnaddress, {}, IRB.getInt32(0));
+    Value *RAPToInt = IRB.CreatePtrToInt(ReturnAddr, Int64Ty);
+    IRB.CreateCall(AsanFreeFunc, {MallocPtrToInt, RAPToInt});
+  }
+  else {
+    FunctionCallee FreeFunc = M.getOrInsertFunction(
+      StringRef("__ockl_dm_dealloc"),
+      FunctionType::get(IRB.getVoidTy(), {Int64Ty}, false));
+    IRB.CreateCall(FreeFunc, {MallocPtrToInt});
+  }
 
   IRB.CreateBr(EndBlock);
 
@@ -1013,9 +1039,11 @@ void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable(
       M, AllKernelsOffsetsType, true, GlobalValue::InternalLinkage, init,
       "llvm.amdgcn.sw.lds.base.table", nullptr, GlobalValue::NotThreadLocal,
       AMDGPUAS::GLOBAL_ADDRESS);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  NKLDSParams.LDSBaseTable->setSanitizerMetadata(MD);
+  if (AsanInstrumentLDS) {
+    GlobalValue::SanitizerMetadata MD;
+    MD.NoAddress = true;
+    NKLDSParams.LDSBaseTable->setSanitizerMetadata(MD);
+  }
 }
 
 void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable(
@@ -1051,9 +1079,11 @@ void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable(
       M, AllKernelsOffsetsType, true, GlobalValue::InternalLinkage, Init,
       "llvm.amdgcn.sw.lds.offset.table", nullptr, GlobalValue::NotThreadLocal,
       AMDGPUAS::GLOBAL_ADDRESS);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  NKLDSParams.LDSOffsetTable->setSanitizerMetadata(MD);
+  if (AsanInstrumentLDS) {
+    GlobalValue::SanitizerMetadata MD;
+    MD.NoAddress = true;
+    NKLDSParams.LDSOffsetTable->setSanitizerMetadata(MD);
+  }
 }
 
 void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
index 2776b9187724c..f81d1bb3c2591 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check indirect dynamic LDS access through a non-kernel from kernel is lowered correctly.
@@ -43,16 +43,16 @@ define void @use_variables() sanitize_address {
 ; CHECK-NEXT:    [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]]
-; CHECK:       27:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[ASAN_REPORT:.*]], label %[[BB29:.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP24]], label %[[BB27:.*]], label %[[BB28:.*]]
+; CHECK:       [[BB27]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP28]]
-; CHECK:       28:
-; CHECK-NEXT:    br label [[TMP29]]
-; CHECK:       29:
+; CHECK-NEXT:    br label %[[BB28]]
+; CHECK:       [[BB28]]:
+; CHECK-NEXT:    br label %[[BB29]]
+; CHECK:       [[BB29]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]]
@@ -68,16 +68,16 @@ define void @use_variables() sanitize_address {
 ; CHECK-NEXT:    [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
-; CHECK-NEXT:    br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]]
-; CHECK:       44:
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[ASAN_REPORT1:.*]], label %[[BB46:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP45]]
-; CHECK:       45:
-; CHECK-NEXT:    br label [[TMP46]]
-; CHECK:       46:
+; CHECK-NEXT:    br label %[[BB45]]
+; CHECK:       [[BB45]]:
+; CHECK-NEXT:    br label %[[BB46]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP31]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -89,15 +89,15 @@ define void @use_variables() sanitize_address {
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -134,9 +134,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
 ; CHECK-NEXT:    [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP75]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB32]]
+; CHECK:       [[BB32]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -159,16 +159,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
 ; CHECK-NEXT:    [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
 ; CHECK-NEXT:    [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
-; CHECK-NEXT:    br i1 [[TMP51]], label [[ASAN_REPORT:%.*]], label [[TMP54:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP49]], label [[TMP52:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       52:
+; CHECK-NEXT:    br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
+; CHECK:       [[BB52]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       53:
-; CHECK-NEXT:    br label [[TMP54]]
-; CHECK:       54:
+; CHECK-NEXT:    br label %[[BB53]]
+; CHECK:       [[BB53]]:
+; CHECK-NEXT:    br label %[[BB54]]
+; CHECK:       [[BB54]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP39]], align 1
 ; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]]
@@ -187,16 +187,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP67:%.*]] = and i1 [[TMP62]], [[TMP66]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP67]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = icmp ne i64 [[TMP68]], 0
-; CHECK-NEXT:    br i1 [[TMP69]], label [[ASAN_REPORT1:%.*]], label [[TMP72:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP67]], label [[TMP70:%.*]], label [[TMP71:%.*]]
-; CHECK:       72:
+; CHECK-NEXT:    br i1 [[TMP69]], label %[[ASAN_REPORT1:.*]], label %[[BB74:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP67]], label %[[BB72:.*]], label %[[BB73:.*]]
+; CHECK:       [[BB72]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP91]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP71]]
-; CHECK:       73:
-; CHECK-NEXT:    br label [[TMP72]]
-; CHECK:       74:
+; CHECK-NEXT:    br label %[[BB73]]
+; CHECK:       [[BB73]]:
+; CHECK-NEXT:    br label %[[BB74]]
+; CHECK:       [[BB74]]:
 ; CHECK-NEXT:    [[TMP92:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64
 ; CHECK-NEXT:    [[TMP76:%.*]] = lshr i64 [[TMP92]], 3
 ; CHECK-NEXT:    [[TMP77:%.*]] = add i64 [[TMP76]], 2147450880
@@ -209,28 +209,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP84:%.*]] = and i1 [[TMP80]], [[TMP83]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP84]])
 ; CHECK-NEXT:    [[TMP86:%.*]] = icmp ne i64 [[TMP85]], 0
-; CHECK-NEXT:    br i1 [[TMP86]], label [[ASAN_REPORT2:%.*]], label [[TMP89:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP84]], label [[TMP87:%.*]], label [[TMP88:%.*]]
-; CHECK:       87:
+; CHECK-NEXT:    br i1 [[TMP86]], label %[[ASAN_REPORT2:.*]], label %[[BB89:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[BB87:.*]], label %[[BB88:.*]]
+; CHECK:       [[BB87]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP92]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP88]]
-; CHECK:       88:
-; CHECK-NEXT:    br label [[TMP89]]
-; CHECK:       89:
+; CHECK-NEXT:    br label %[[BB88]]
+; CHECK:       [[BB88]]:
+; CHECK-NEXT:    br label %[[BB89]]
+; CHECK:       [[BB89]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP56]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
 ; CHECK-NEXT:    [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64
-; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables()
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll
index 8cbeb80d62335..7a28d60f2319c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check indirect dynamic LDS access through a non-kernel from kernel is lowered correctly.
@@ -8,11 +8,11 @@
 @lds_4 = external addrspace(3) global [0 x i8], align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0]
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]]
 ;.
 define void @use_variables() sanitize_address {
 ; CHECK-LABEL: define void @use_variables(
@@ -44,16 +44,16 @@ define void @use_variables() sanitize_address {
 
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -76,23 +76,11 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP28]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
-; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP20:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP26]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
-; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP37]], i64 24)
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33
-; CHECK-NEXT:    [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP73]], i64 31)
-; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
-; CHECK-NEXT:    [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP75]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -107,17 +95,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]]
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP56]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP34]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables()
@@ -138,5 +124,6 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[META2]] = !{i32 0}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META3]] = !{i32 0}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
index f33b30119754f..ee75cdb7cc2a4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if direct access of dynamic LDS in kernel is lowered correctly.
@@ -13,15 +13,15 @@
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = add i32 [[TMP8]], [[TMP9]]
@@ -44,9 +44,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
 ; CHECK-NEXT:    [[TMP44:%.*]] = ptrtoint ptr addrspace(1) [[TMP42]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP44]], i64 24)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       23:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP28:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -66,28 +66,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP38:%.*]] = and i1 [[TMP34]], [[TMP37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP38]])
 ; CHECK-NEXT:    [[TMP40:%.*]] = icmp ne i64 [[TMP39]], 0
-; CHECK-NEXT:    br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP38]], label [[TMP41:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[ASAN_REPORT:.*]], label %[[BB43:.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP38]], label %[[BB41:.*]], label %[[BB42:.*]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP29]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       43:
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP46]], align 4
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -109,5 +109,6 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll
index 5e90eb0b95219..da2c10f5200e8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if direct access of dynamic LDS in kernel is lowered correctly.
@@ -6,22 +6,22 @@
 @lds_2 = external addrspace(3) global [0 x i8]
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 1, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }
 ;.
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = add i32 [[TMP8]], [[TMP9]]
@@ -36,17 +36,11 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP16]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP24]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP21]], i64 [[TMP23]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP21]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP44:%.*]] = ptrtoint ptr addrspace(1) [[TMP42]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP44]], i64 24)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       23:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP28:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -55,17 +49,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(3) [[TMP11]] to i32
 ; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP28]], i32 [[TMP45]]
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP46]], align 4
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP27]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -84,4 +76,5 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll
index 91e0a9fc5018b..ac1792b894ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check when multiple  kernels access the same non-kernel, LDS accesses are lowere correctly.
@@ -36,16 +36,16 @@ define void @use_variables_1() sanitize_address {
 ; CHECK-NEXT:    [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]]
-; CHECK:       27:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[ASAN_REPORT:.*]], label %[[BB29:.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP24]], label %[[BB27:.*]], label %[[BB28:.*]]
+; CHECK:       [[BB27]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP28]]
-; CHECK:       28:
-; CHECK-NEXT:    br label [[TMP29]]
-; CHECK:       29:
+; CHECK-NEXT:    br label %[[BB28]]
+; CHECK:       [[BB28]]:
+; CHECK-NEXT:    br label %[[BB29]]
+; CHECK:       [[BB29]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]]
@@ -61,16 +61,16 @@ define void @use_variables_1() sanitize_address {
 ; CHECK-NEXT:    [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
-; CHECK-NEXT:    br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]]
-; CHECK:       44:
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[ASAN_REPORT1:.*]], label %[[BB46:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP45]]
-; CHECK:       45:
-; CHECK-NEXT:    br label [[TMP46]]
-; CHECK:       46:
+; CHECK-NEXT:    br label %[[BB45]]
+; CHECK:       [[BB45]]:
+; CHECK-NEXT:    br label %[[BB46]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP31]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -108,16 +108,16 @@ define void @use_variables_2() sanitize_address {
 ; CHECK-NEXT:    [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]]
-; CHECK:       27:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[ASAN_REPORT:.*]], label %[[BB29:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP24]], label %[[BB27:.*]], label %[[BB28:.*]]
+; CHECK:       [[BB27]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP48]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP28]]
-; CHECK:       28:
-; CHECK-NEXT:    br label [[TMP29]]
-; CHECK:       29:
+; CHECK-NEXT:    br label %[[BB28]]
+; CHECK:       [[BB28]]:
+; CHECK-NEXT:    br label %[[BB29]]
+; CHECK:       [[BB29]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP14]], align 1
 ; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]]
@@ -136,16 +136,16 @@ define void @use_variables_2() sanitize_address {
 ; CHECK-NEXT:    [[TMP42:%.*]] = and i1 [[TMP37]], [[TMP41]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP42]])
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp ne i64 [[TMP43]], 0
-; CHECK-NEXT:    br i1 [[TMP44]], label [[ASAN_REPORT1:%.*]], label [[TMP47:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP42]], label [[TMP45:%.*]], label [[TMP46:%.*]]
-; CHECK:       47:
+; CHECK-NEXT:    br i1 [[TMP44]], label %[[ASAN_REPORT1:.*]], label %[[BB49:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP42]], label %[[BB47:.*]], label %[[BB48:.*]]
+; CHECK:       [[BB47]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP65]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP46]]
-; CHECK:       48:
-; CHECK-NEXT:    br label [[TMP47]]
-; CHECK:       49:
+; CHECK-NEXT:    br label %[[BB48]]
+; CHECK:       [[BB48]]:
+; CHECK-NEXT:    br label %[[BB49]]
+; CHECK:       [[BB49]]:
 ; CHECK-NEXT:    [[TMP50:%.*]] = ptrtoint ptr addrspace(1) [[TMP49]] to i64
 ; CHECK-NEXT:    [[TMP51:%.*]] = lshr i64 [[TMP50]], 3
 ; CHECK-NEXT:    [[TMP52:%.*]] = add i64 [[TMP51]], 2147450880
@@ -158,16 +158,16 @@ define void @use_variables_2() sanitize_address {
 ; CHECK-NEXT:    [[TMP59:%.*]] = and i1 [[TMP55]], [[TMP58]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]])
 ; CHECK-NEXT:    [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0
-; CHECK-NEXT:    br i1 [[TMP61]], label [[ASAN_REPORT2:%.*]], label [[TMP64:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]]
-; CHECK:       62:
+; CHECK-NEXT:    br i1 [[TMP61]], label %[[ASAN_REPORT2:.*]], label %[[BB64:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[BB62:.*]], label %[[BB63:.*]]
+; CHECK:       [[BB62]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP50]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP63]]
-; CHECK:       63:
-; CHECK-NEXT:    br label [[TMP64]]
-; CHECK:       64:
+; CHECK-NEXT:    br label %[[BB63]]
+; CHECK:       [[BB63]]:
+; CHECK-NEXT:    br label %[[BB64]]
+; CHECK:       [[BB64]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP31]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -179,15 +179,15 @@ define void @use_variables_2() sanitize_address {
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB30:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -221,9 +221,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33
 ; CHECK-NEXT:    [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP52]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP53]], i64 31)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       30:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB30]]
+; CHECK:       [[BB30]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -244,28 +244,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP45:%.*]] = and i1 [[TMP41]], [[TMP44]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP45]])
 ; CHECK-NEXT:    [[TMP47:%.*]] = icmp ne i64 [[TMP46]], 0
-; CHECK-NEXT:    br i1 [[TMP47]], label [[ASAN_REPORT:%.*]], label [[TMP50:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP45]], label [[TMP48:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       48:
+; CHECK-NEXT:    br i1 [[TMP47]], label %[[ASAN_REPORT:.*]], label %[[BB50:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP45]], label %[[BB48:.*]], label %[[BB49:.*]]
+; CHECK:       [[BB48]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP36]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       49:
-; CHECK-NEXT:    br label [[TMP50]]
-; CHECK:       50:
+; CHECK-NEXT:    br label %[[BB49]]
+; CHECK:       [[BB49]]:
+; CHECK-NEXT:    br label %[[BB50]]
+; CHECK:       [[BB50]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP35]], align 1
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
 ; CHECK-NEXT:    [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
-; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables_1()
@@ -276,15 +276,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k1(
 ; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -321,9 +321,9 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
 ; CHECK-NEXT:    [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP57]], i64 28)
-; CHECK-NEXT:    br label [[TMP14]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB32]]
+; CHECK:       [[BB32]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
 ; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4
@@ -345,28 +345,28 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    [[TMP47:%.*]] = and i1 [[TMP43]], [[TMP46]]
 ; CHECK-NEXT:    [[TMP48:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP47]])
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp ne i64 [[TMP48]], 0
-; CHECK-NEXT:    br i1 [[TMP49]], label [[ASAN_REPORT:%.*]], label [[TMP52:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP47]], label [[TMP50:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       50:
+; CHECK-NEXT:    br i1 [[TMP49]], label %[[ASAN_REPORT:.*]], label %[[BB52:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP47]], label %[[BB50:.*]], label %[[BB51:.*]]
+; CHECK:       [[BB50]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP38]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       51:
-; CHECK-NEXT:    br label [[TMP52]]
-; CHECK:       52:
+; CHECK-NEXT:    br label %[[BB51]]
+; CHECK:       [[BB51]]:
+; CHECK-NEXT:    br label %[[BB52]]
+; CHECK:       [[BB52]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP59]], align 4
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
 ; CHECK-NEXT:    [[TMP35:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[TMP35]] to i64
-; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP37]], i64 [[TMP36]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables_1()
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll
index d0caddb7934a7..33412e00a3733 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check when multiple  kernels access the same non-kernel, LDS accesses are lowere correctly.
@@ -65,16 +65,16 @@ define void @use_variables_2() sanitize_address {
 
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -97,20 +97,11 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
 ; CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[TMP15]], [[TMP19]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
-; CHECK-NEXT:    [[TMP28:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP28]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP27]], i64 [[TMP33]])
-; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP24]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP20:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP27]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
-; CHECK-NEXT:    [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP49]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP51]], i64 24)
-; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33
-; CHECK-NEXT:    [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP52]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP53]], i64 31)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       30:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -120,17 +111,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
 ; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP34]]
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP35]], align 1
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP32]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables_1()
@@ -140,16 +129,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 
 define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k1(
-; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -172,23 +161,11 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP24]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = add i32 [[TMP20]], [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP25]] to i64
-; CHECK-NEXT:    [[TMP27:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[TMP27]] to i64
-; CHECK-NEXT:    [[TMP34:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP28]])
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP34]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP13:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP26]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
-; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP53]], i64 24)
-; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
-; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(1) [[TMP54]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP55]], i64 31)
-; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
-; CHECK-NEXT:    [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP57]], i64 28)
-; CHECK-NEXT:    br label [[TMP14]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
 ; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4
@@ -199,17 +176,15 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP32]] to i32
 ; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP58]]
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP59]], align 4
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP35:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP36:%.*]] = ptrtoint ptr [[TMP35]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP37]], i64 [[TMP36]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP37]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables_1()
@@ -222,6 +197,6 @@ define amdgpu_kernel void @k1() sanitize_address {
 !0 = !{i32 4, !"nosanitize_address", i32 1}
 
 ;.
-; CHECK: [[META2]] = !{i32 0}
-; CHECK: [[META3]] = !{i32 1}
+; CHECK: [[META3]] = !{i32 0}
+; CHECK: [[META4]] = !{i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll
index 07baf90e370d1..400394c146d2a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check malloc and free blocks are placed correctly when multiple
@@ -14,15 +14,15 @@
 define amdgpu_kernel void @test_kernel() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @test_kernel(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP15]], [[TMP16]]
@@ -41,9 +41,9 @@ define amdgpu_kernel void @test_kernel() sanitize_address {
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68
 ; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP32]], i64 28)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -56,26 +56,26 @@ define amdgpu_kernel void @test_kernel() sanitize_address {
 ; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr addrspace(1) [[TMP13]], align 4
 ; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[VAL1]], [[VAL2]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[RESULT]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[NEGATIVE:%.*]]
-; CHECK:       positive:
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       negative:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[POSITIVE:.*]], label %[[NEGATIVE:.*]]
+; CHECK:       [[POSITIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[NEGATIVE]]:
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[VAL1]], 0
-; CHECK-NEXT:    br i1 [[CMP2]], label [[VAL1_POSITIVE:%.*]], label [[VAL1_NEGATIVE:%.*]]
-; CHECK:       val1_positive:
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       val1_negative:
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[VAL1_POSITIVE:.*]], label %[[VAL1_NEGATIVE:.*]]
+; CHECK:       [[VAL1_POSITIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE]]
+; CHECK:       [[VAL1_NEGATIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP24]], i64 [[TMP23]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
 %val1 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_1 to ptr addrspace(1))
@@ -109,4 +109,5 @@ ret void
 ; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll
index 6848e2c06c1e1..f55f65e0abce6 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check malloc and free blocks are placed correctly when multiple
@@ -8,42 +8,30 @@
 @lds_2 = internal addrspace(3) global i32 poison
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.test_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.test_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.test_kernel.md.type { %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.test_kernel = internal addrspace(3) global ptr poison, align 4, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.test_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.test_kernel.md.type { %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 8, i32 4, i32 4 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 12, i32 4, i32 4 } }
 ;.
 define amdgpu_kernel void @test_kernel() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @test_kernel(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP15]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT:    [[TMP14:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP20]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP17]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP28]], i64 24)
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 36
-; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP30]], i64 28)
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68
-; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP32]], i64 28)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -56,26 +44,24 @@ define amdgpu_kernel void @test_kernel() sanitize_address {
 ; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr addrspace(1) [[TMP13]], align 4
 ; CHECK-NEXT:    [[RESULT:%.*]] = add i32 [[VAL1]], [[VAL2]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[RESULT]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[NEGATIVE:%.*]]
-; CHECK:       positive:
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       negative:
+; CHECK-NEXT:    br i1 [[CMP]], label %[[POSITIVE:.*]], label %[[NEGATIVE:.*]]
+; CHECK:       [[POSITIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[NEGATIVE]]:
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[VAL1]], 0
-; CHECK-NEXT:    br i1 [[CMP2]], label [[VAL1_POSITIVE:%.*]], label [[VAL1_NEGATIVE:%.*]]
-; CHECK:       val1_positive:
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       val1_negative:
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[VAL1_POSITIVE:.*]], label %[[VAL1_NEGATIVE:.*]]
+; CHECK:       [[VAL1_POSITIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE]]
+; CHECK:       [[VAL1_NEGATIVE]]:
+; CHECK-NEXT:    br label %[[CONDFREE]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP24]], i64 [[TMP23]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP24]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
 %val1 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_1 to ptr addrspace(1))
@@ -105,8 +91,8 @@ ret void
 ;.
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
index a6e6b84bba304..75ac649d0c28d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -73,9 +73,9 @@ define amdgpu_kernel void @k1() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
index 40b1305a3b12c..9cd7d8ab8be82 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static and dynamic LDS accesses are lowered correctly when a non-kernel
@@ -44,16 +44,16 @@ define void @use_variables() sanitize_address {
 ; CHECK-NEXT:    [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]]
-; CHECK:       27:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[ASAN_REPORT:.*]], label %[[BB29:.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP24]], label %[[BB27:.*]], label %[[BB28:.*]]
+; CHECK:       [[BB27]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP28]]
-; CHECK:       28:
-; CHECK-NEXT:    br label [[TMP29]]
-; CHECK:       29:
+; CHECK-NEXT:    br label %[[BB28]]
+; CHECK:       [[BB28]]:
+; CHECK-NEXT:    br label %[[BB29]]
+; CHECK:       [[BB29]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]]
@@ -69,16 +69,16 @@ define void @use_variables() sanitize_address {
 ; CHECK-NEXT:    [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
-; CHECK-NEXT:    br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]]
-; CHECK:       44:
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[ASAN_REPORT1:.*]], label %[[BB46:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP45]]
-; CHECK:       45:
-; CHECK-NEXT:    br label [[TMP46]]
-; CHECK:       46:
+; CHECK-NEXT:    br label %[[BB45]]
+; CHECK:       [[BB45]]:
+; CHECK-NEXT:    br label %[[BB46]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP31]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -90,15 +90,15 @@ define void @use_variables() sanitize_address {
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -135,9 +135,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
 ; CHECK-NEXT:    [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP75]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB32]]
+; CHECK:       [[BB32]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -160,16 +160,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
 ; CHECK-NEXT:    [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
 ; CHECK-NEXT:    [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
-; CHECK-NEXT:    br i1 [[TMP51]], label [[ASAN_REPORT:%.*]], label [[TMP54:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP49]], label [[TMP52:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       52:
+; CHECK-NEXT:    br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
+; CHECK:       [[BB52]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       53:
-; CHECK-NEXT:    br label [[TMP54]]
-; CHECK:       54:
+; CHECK-NEXT:    br label %[[BB53]]
+; CHECK:       [[BB53]]:
+; CHECK-NEXT:    br label %[[BB54]]
+; CHECK:       [[BB54]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP39]], align 1
 ; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]]
@@ -188,16 +188,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP67:%.*]] = and i1 [[TMP62]], [[TMP66]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP67]])
 ; CHECK-NEXT:    [[TMP69:%.*]] = icmp ne i64 [[TMP68]], 0
-; CHECK-NEXT:    br i1 [[TMP69]], label [[ASAN_REPORT1:%.*]], label [[TMP72:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP67]], label [[TMP70:%.*]], label [[TMP71:%.*]]
-; CHECK:       72:
+; CHECK-NEXT:    br i1 [[TMP69]], label %[[ASAN_REPORT1:.*]], label %[[BB74:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP67]], label %[[BB72:.*]], label %[[BB73:.*]]
+; CHECK:       [[BB72]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP91]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP71]]
-; CHECK:       73:
-; CHECK-NEXT:    br label [[TMP72]]
-; CHECK:       74:
+; CHECK-NEXT:    br label %[[BB73]]
+; CHECK:       [[BB73]]:
+; CHECK-NEXT:    br label %[[BB74]]
+; CHECK:       [[BB74]]:
 ; CHECK-NEXT:    [[TMP92:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64
 ; CHECK-NEXT:    [[TMP76:%.*]] = lshr i64 [[TMP92]], 3
 ; CHECK-NEXT:    [[TMP77:%.*]] = add i64 [[TMP76]], 2147450880
@@ -210,28 +210,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP84:%.*]] = and i1 [[TMP80]], [[TMP83]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP84]])
 ; CHECK-NEXT:    [[TMP86:%.*]] = icmp ne i64 [[TMP85]], 0
-; CHECK-NEXT:    br i1 [[TMP86]], label [[ASAN_REPORT2:%.*]], label [[TMP89:%.*]], !prof [[PROF3]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP84]], label [[TMP87:%.*]], label [[TMP88:%.*]]
-; CHECK:       87:
+; CHECK-NEXT:    br i1 [[TMP86]], label %[[ASAN_REPORT2:.*]], label %[[BB89:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[BB87:.*]], label %[[BB88:.*]]
+; CHECK:       [[BB87]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP92]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP88]]
-; CHECK:       88:
-; CHECK-NEXT:    br label [[TMP89]]
-; CHECK:       89:
+; CHECK-NEXT:    br label %[[BB88]]
+; CHECK:       [[BB88]]:
+; CHECK-NEXT:    br label %[[BB89]]
+; CHECK:       [[BB89]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP56]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
 ; CHECK-NEXT:    [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64
-; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables()
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll
index 0cc49c94e2279..aba2a1d122296 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static and dynamic LDS accesses are lowered correctly when a non-kernel
@@ -9,11 +9,11 @@
 @lds_4 = external addrspace(3) global [0 x i8], align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0]
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]]
 ;.
 define void @use_variables() sanitize_address {
 ; CHECK-LABEL: define void @use_variables(
@@ -45,16 +45,16 @@ define void @use_variables() sanitize_address {
 
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -77,23 +77,11 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP28]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]])
-; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP20:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP26]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
-; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP37]], i64 24)
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33
-; CHECK-NEXT:    [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP73]], i64 31)
-; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
-; CHECK-NEXT:    [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP75]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -108,17 +96,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]]
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP56]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP34]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @use_variables()
@@ -139,5 +125,6 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[META2]] = !{i32 0}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META3]] = !{i32 0}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
index f2cdc4c812db1..202c4a20f055b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static and dynamic LDS accesses are lowered correctly in kernel.
@@ -15,15 +15,15 @@
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -60,9 +60,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
 ; CHECK-NEXT:    [[TMP113:%.*]] = ptrtoint ptr addrspace(1) [[TMP112]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP113]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB32]]
+; CHECK:       [[BB32]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP35:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -88,16 +88,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]])
 ; CHECK-NEXT:    [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0
-; CHECK-NEXT:    br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP53]], label [[TMP56:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       56:
+; CHECK-NEXT:    br i1 [[TMP55]], label %[[ASAN_REPORT:.*]], label %[[BB58:.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP53]], label %[[BB56:.*]], label %[[BB57:.*]]
+; CHECK:       [[BB56]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP44]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       57:
-; CHECK-NEXT:    br label [[TMP58]]
-; CHECK:       58:
+; CHECK-NEXT:    br label %[[BB57]]
+; CHECK:       [[BB57]]:
+; CHECK-NEXT:    br label %[[BB58]]
+; CHECK:       [[BB58]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP43]], align 4
 ; CHECK-NEXT:    [[TMP59:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32
 ; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP59]]
@@ -114,16 +114,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP71:%.*]] = and i1 [[TMP66]], [[TMP70]]
 ; CHECK-NEXT:    [[TMP72:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP71]])
 ; CHECK-NEXT:    [[TMP73:%.*]] = icmp ne i64 [[TMP72]], 0
-; CHECK-NEXT:    br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP71]], label [[TMP74:%.*]], label [[TMP75:%.*]]
-; CHECK:       74:
+; CHECK-NEXT:    br i1 [[TMP73]], label %[[ASAN_REPORT1:.*]], label %[[BB76:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP71]], label %[[BB74:.*]], label %[[BB75:.*]]
+; CHECK:       [[BB74]]:
 ; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP61]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP75]]
-; CHECK:       75:
-; CHECK-NEXT:    br label [[TMP76]]
-; CHECK:       76:
+; CHECK-NEXT:    br label %[[BB75]]
+; CHECK:       [[BB75]]:
+; CHECK-NEXT:    br label %[[BB76]]
+; CHECK:       [[BB76]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP60]], align 8
 ; CHECK-NEXT:    [[TMP77:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32
 ; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP77]]
@@ -139,16 +139,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
 ; CHECK-NEXT:    [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
 ; CHECK-NEXT:    [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
-; CHECK-NEXT:    br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP88]], label [[TMP91:%.*]], label [[TMP92:%.*]]
-; CHECK:       91:
+; CHECK-NEXT:    br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]]
+; CHECK:       [[BB91]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP79]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP92]]
-; CHECK:       92:
-; CHECK-NEXT:    br label [[TMP93]]
-; CHECK:       93:
+; CHECK-NEXT:    br label %[[BB92]]
+; CHECK:       [[BB92]]:
+; CHECK-NEXT:    br label %[[BB93]]
+; CHECK:       [[BB93]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP78]], align 4
 ; CHECK-NEXT:    [[TMP94:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
 ; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP94]]
@@ -164,28 +164,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP105:%.*]] = and i1 [[TMP101]], [[TMP104]]
 ; CHECK-NEXT:    [[TMP106:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP105]])
 ; CHECK-NEXT:    [[TMP107:%.*]] = icmp ne i64 [[TMP106]], 0
-; CHECK-NEXT:    br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report3:
-; CHECK-NEXT:    br i1 [[TMP105]], label [[TMP108:%.*]], label [[TMP109:%.*]]
-; CHECK:       108:
+; CHECK-NEXT:    br i1 [[TMP107]], label %[[ASAN_REPORT3:.*]], label %[[BB110:.*]], !prof [[PROF3]]
+; CHECK:       [[ASAN_REPORT3]]:
+; CHECK-NEXT:    br i1 [[TMP105]], label %[[BB108:.*]], label %[[BB109:.*]]
+; CHECK:       [[BB108]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP96]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP109]]
-; CHECK:       109:
-; CHECK-NEXT:    br label [[TMP110]]
-; CHECK:       110:
+; CHECK-NEXT:    br label %[[BB109]]
+; CHECK:       [[BB109]]:
+; CHECK-NEXT:    br label %[[BB110]]
+; CHECK:       [[BB110]]:
 ; CHECK-NEXT:    store i8 8, ptr addrspace(1) [[TMP95]], align 8
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64
 ; CHECK-NEXT:    [[TMP36:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP36]] to i64
-; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP38]], i64 [[TMP37]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -209,5 +209,6 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll
index e0bfca0f63ca7..bde5e65c1c732 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static and dynamic LDS accesses are lowered correctly in kernel.
@@ -8,22 +8,22 @@
 @lds_4 = external addrspace(3) global [0 x i8], align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }
 ;.
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]]
@@ -46,23 +46,11 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = add i32 [[TMP15]], [[TMP19]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = zext i32 [[TMP32]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP30]], i64 [[TMP23]])
-; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP39]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP20:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP30]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
-; CHECK-NEXT:    [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP41]], i64 24)
-; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33
-; CHECK-NEXT:    [[TMP111:%.*]] = ptrtoint ptr addrspace(1) [[TMP57]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP111]], i64 31)
-; CHECK-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68
-; CHECK-NEXT:    [[TMP113:%.*]] = ptrtoint ptr addrspace(1) [[TMP112]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP113]], i64 28)
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       32:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB23]]
+; CHECK:       [[BB23]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP35:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -86,17 +74,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP94:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
 ; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP94]]
 ; CHECK-NEXT:    store i8 8, ptr addrspace(1) [[TMP95]], align 8
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP36:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr [[TMP36]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP38]], i64 [[TMP37]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP38]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -117,4 +103,5 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1]] = !{i32 8, i32 9}
+; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
index b9b4c90daea87..04af557084208 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
@@ -194,9 +194,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
index a70db2259cc3f..75398005346c2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if LDS accesses are lowered correctly when LDS is passed as function
@@ -33,16 +33,16 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
 ; CHECK-NEXT:    [[TMP17:%.*]] = and i1 [[TMP12]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP17]])
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0
-; CHECK-NEXT:    br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP17]], label [[TMP20:%.*]], label [[TMP21:%.*]]
-; CHECK:       20:
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[ASAN_REPORT:.*]], label %[[BB22:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[BB20:.*]], label %[[BB21:.*]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP7]]) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
-; CHECK-NEXT:    br label [[TMP22]]
-; CHECK:       22:
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    br label %[[BB22]]
+; CHECK:       [[BB22]]:
 ; CHECK-NEXT:    [[LDS_VAL:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
 ; CHECK-NEXT:    [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32
@@ -60,16 +60,16 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
 ; CHECK-NEXT:    [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]])
 ; CHECK-NEXT:    [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT:    br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF1]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]]
-; CHECK:       39:
+; CHECK-NEXT:    br i1 [[TMP38]], label %[[ASAN_REPORT1:.*]], label %[[BB41:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP36]], label %[[BB39:.*]], label %[[BB40:.*]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP26]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP40]]
-; CHECK:       40:
-; CHECK-NEXT:    br label [[TMP41]]
-; CHECK:       41:
+; CHECK-NEXT:    br label %[[BB40]]
+; CHECK:       [[BB40]]:
+; CHECK-NEXT:    br label %[[BB41]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    store i32 [[NEW_LDS_VAL]], ptr addrspace(1) [[TMP25]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -81,16 +81,16 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
 
 define amdgpu_kernel void @my_kernel() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @my_kernel(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP11]], [[TMP12]]
@@ -106,26 +106,26 @@ define amdgpu_kernel void @my_kernel() sanitize_address {
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 4128
 ; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP24]], i64 1024)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       18:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP17:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP8]]
 ; CHECK-NEXT:    [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP9]], i32 0, i32 0
 ; CHECK-NEXT:    call void @my_function(ptr addrspace(3) [[LDS_PTR]])
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = ptrtoint ptr addrspace(1) [[TMP17]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64
-; CHECK-NEXT:    [[TMP20:%.*]] = ptrtoint ptr addrspace(1) [[TMP17]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP20]], i64 [[TMP19]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) @lds_var, i32 0, i32 0
@@ -147,6 +147,7 @@ define amdgpu_kernel void @my_kernel() sanitize_address {
 ; CHECK: attributes #[[ATTR7]] = { nomerge }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-; CHECK: [[META2]] = !{i32 0}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META3]] = !{i32 0}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll
index 55a36f85dc73a..fa5888e88d022 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if LDS accesses are lowered correctly when LDS is passed as function
@@ -7,9 +7,9 @@
 @lds_var = internal addrspace(3) global [1024 x i32] poison, align 4
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address
-; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, align 4, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 8, i32 4096, i32 4096 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel]
 ;.
 define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
 ; CHECK-LABEL: define void @my_function(
@@ -35,51 +35,40 @@ define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address {
 
 define amdgpu_kernel void @my_kernel() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @my_kernel(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[TMP11]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP13]], i64 [[TMP15]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP16]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP13]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP22]], i64 24)
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 4128
-; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP24]], i64 1024)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       18:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP17:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP8]]
 ; CHECK-NEXT:    [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP9]], i32 0, i32 0
 ; CHECK-NEXT:    call void @my_function(ptr addrspace(3) [[LDS_PTR]])
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP18:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP20:%.*]] = ptrtoint ptr addrspace(1) [[TMP17]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP20]], i64 [[TMP19]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP20]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) @lds_var, i32 0, i32 0
@@ -94,9 +83,9 @@ define amdgpu_kernel void @my_kernel() sanitize_address {
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address }
 ; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
-; CHECK: [[META1]] = !{i32 0}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META2]] = !{i32 0}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
index 255dda562c1ea..466af26b7400d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
@@ -7,13 +7,13 @@
 @B = external addrspace(3) global [0 x i32]
 
 ;.
-; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
-; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
 ; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
 ; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
 ; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
 ; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
 ; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
@@ -58,9 +58,9 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
@@ -116,9 +116,9 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
@@ -166,9 +166,9 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
@@ -224,9 +224,9 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
@@ -288,8 +288,3 @@ define private ptr @get_B_ptr() sanitize_address {
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 4, !"nosanitize_address", i32 1}
-
-;.
-; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
-; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
index 7184ebbb8faa3..4a8b09ddfd886 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
@@ -7,18 +7,18 @@
 @B = external addrspace(3) global [0 x i32]
 
 ;.
-; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
-; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
-; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
-; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
-; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
-; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]], no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 8, i32 64, i32 64 } }
+; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 8, i32 64, i32 64 } }
+; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 8, i32 0, i32 0 } }
+; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol [[META1]]
+; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 8, i32 0, i32 0 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3]
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]]
 ;.
 define amdgpu_kernel void @kernel_0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_0(
@@ -30,25 +30,16 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP11]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP21]], i64 24)
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT:    br label %[[BB18]]
-; CHECK:       [[BB18]]:
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
@@ -58,10 +49,8 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP18]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -80,7 +69,7 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
@@ -96,16 +85,10 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP13:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP16]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT:    br label %[[BB23]]
-; CHECK:       [[BB23]]:
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
@@ -116,10 +99,8 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP25]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -138,25 +119,16 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP11]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP21]], i64 24)
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT:    br label %[[BB18]]
-; CHECK:       [[BB18]]:
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
@@ -166,10 +138,8 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP18]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -188,7 +158,7 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
@@ -204,16 +174,10 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP13:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP16]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT:    br label %[[BB23]]
-; CHECK:       [[BB23]]:
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
@@ -224,10 +188,8 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP25]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -288,8 +250,3 @@ define private ptr @get_B_ptr() sanitize_address {
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 4, !"nosanitize_address", i32 1}
-
-;.
-; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
-; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
index 704bc9e635294..176ad0286bcb3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
@@ -9,10 +9,10 @@
 @lds_4 = external addrspace(3) global [4 x i8], align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 3, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 4, i32 8 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0]
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]]
 ;.
 define void @use_variables() sanitize_address {
 ; CHECK-LABEL: define void @use_variables(
@@ -57,34 +57,16 @@ define amdgpu_kernel void @k0() sanitize_address #1 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP13:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP9]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
-; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 31)
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
-; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
-; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP21]], i64 29)
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
-; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 28)
-; CHECK-NEXT:    br label %[[BB24]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
@@ -104,10 +86,8 @@ define amdgpu_kernel void @k0() sanitize_address #1 {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP34:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP35:%.*]] = ptrtoint ptr [[TMP34]] to i64
 ; CHECK-NEXT:    [[TMP36:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP36]], i64 [[TMP35]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP36]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -125,8 +105,7 @@ attributes #1 = { "amdgpu-no-lds-kernel-id" }
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address }
 ; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
index 8f5abe962f8eb..ad1ed3f00b959 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
@@ -8,10 +8,10 @@
 @lds_4 = external addrspace(3) global [4 x i8], align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
-; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 3, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 4, i32 8 } }
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0]
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]]
 ;.
 define void @use_variables() sanitize_address {
 ; CHECK-LABEL: define void @use_variables(
@@ -56,34 +56,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB11:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP24]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP15]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP26]], i64 24)
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33
-; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP28]], i64 31)
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68
-; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP45]], i64 28)
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 99
-; CHECK-NEXT:    [[TMP66:%.*]] = ptrtoint ptr addrspace(1) [[TMP65]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP66]], i64 29)
-; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
-; CHECK-NEXT:    [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP68]], i64 28)
-; CHECK-NEXT:    br label %[[BB24]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
@@ -103,10 +85,8 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
 ; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP22]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -124,8 +104,7 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address }
 ; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
index 1973a0acf4659..f72690cd5e567 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
@@ -50,9 +50,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
-; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
index 73ffcdd783ded..818f2085ec9d9 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-no-heap-ptr.ll
@@ -129,9 +129,9 @@ define amdgpu_kernel void @k0() sanitize_address #1 {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    [[TMP78:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP79:%.*]] = ptrtoint ptr [[TMP78]] to i64
-; CHECK-NEXT:    [[TMP80:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP80]], i64 [[TMP79]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
index 301bda7e0086e..165b523e133c8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static LDS accesses in kernel are lowered correctly.
@@ -12,15 +12,15 @@
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@@ -39,9 +39,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68
 ; CHECK-NEXT:    [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP64]], i64 28)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -62,16 +62,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP37:%.*]] = and i1 [[TMP33]], [[TMP36]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP37]])
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp ne i64 [[TMP38]], 0
-; CHECK-NEXT:    br i1 [[TMP39]], label [[ASAN_REPORT:%.*]], label [[TMP42:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP37]], label [[TMP40:%.*]], label [[CONDFREE:%.*]]
-; CHECK:       40:
+; CHECK-NEXT:    br i1 [[TMP39]], label %[[ASAN_REPORT:.*]], label %[[BB42:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP37]], label %[[BB40:.*]], label %[[BB41:.*]]
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP28]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[CONDFREE]]
-; CHECK:       41:
-; CHECK-NEXT:    br label [[TMP42]]
-; CHECK:       42:
+; CHECK-NEXT:    br label %[[BB41]]
+; CHECK:       [[BB41]]:
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP27]], align 4
 ; CHECK-NEXT:    [[TMP43:%.*]] = ptrtoint ptr addrspace(3) [[TMP24]] to i32
 ; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP43]]
@@ -90,16 +90,16 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP55:%.*]] = and i1 [[TMP50]], [[TMP54]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP55]])
 ; CHECK-NEXT:    [[TMP57:%.*]] = icmp ne i64 [[TMP56]], 0
-; CHECK-NEXT:    br i1 [[TMP57]], label [[ASAN_REPORT1:%.*]], label [[TMP60:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP55]], label [[TMP58:%.*]], label [[TMP59:%.*]]
-; CHECK:       60:
+; CHECK-NEXT:    br i1 [[TMP57]], label %[[ASAN_REPORT1:.*]], label %[[BB62:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP55]], label %[[BB60:.*]], label %[[BB61:.*]]
+; CHECK:       [[BB60]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP79]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP59]]
-; CHECK:       61:
-; CHECK-NEXT:    br label [[TMP60]]
-; CHECK:       62:
+; CHECK-NEXT:    br label %[[BB61]]
+; CHECK:       [[BB61]]:
+; CHECK-NEXT:    br label %[[BB62]]
+; CHECK:       [[BB62]]:
 ; CHECK-NEXT:    [[TMP80:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64
 ; CHECK-NEXT:    [[TMP81:%.*]] = lshr i64 [[TMP80]], 3
 ; CHECK-NEXT:    [[TMP65:%.*]] = add i64 [[TMP81]], 2147450880
@@ -112,28 +112,28 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP72:%.*]] = and i1 [[TMP68]], [[TMP71]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP72]])
 ; CHECK-NEXT:    [[TMP74:%.*]] = icmp ne i64 [[TMP73]], 0
-; CHECK-NEXT:    br i1 [[TMP74]], label [[ASAN_REPORT2:%.*]], label [[TMP77:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP72]], label [[TMP75:%.*]], label [[TMP76:%.*]]
-; CHECK:       75:
+; CHECK-NEXT:    br i1 [[TMP74]], label %[[ASAN_REPORT2:.*]], label %[[BB77:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP72]], label %[[BB75:.*]], label %[[BB76:.*]]
+; CHECK:       [[BB75]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP80]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP76]]
-; CHECK:       76:
-; CHECK-NEXT:    br label [[TMP77]]
-; CHECK:       77:
+; CHECK-NEXT:    br label %[[BB76]]
+; CHECK:       [[BB76]]:
+; CHECK-NEXT:    br label %[[BB77]]
+; CHECK:       [[BB77]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP44]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
index 02a241f947748..f500a3fe98c6b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 @lds_1 = internal addrspace(3) global [1 x i32] poison, align 4
@@ -10,15 +10,15 @@
 define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32], [8 x i32], i32 %swap) sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @atomic_xchg_kernel(
 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], [8 x i32] [[TMP0:%.*]], [8 x i32] [[TMP1:%.*]], i32 [[SWAP:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
@@ -34,9 +34,9 @@ define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32],
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP15]], i64 36
 ; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
-; CHECK-NEXT:    br label [[TMP20]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.atomic_xchg_kernel, align 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -59,16 +59,16 @@ define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32],
 ; CHECK-NEXT:    [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]])
 ; CHECK-NEXT:    [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT:    br i1 [[TMP38]], label [[ASAN_REPORT:%.*]], label [[TMP41:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]]
-; CHECK:       41:
+; CHECK-NEXT:    br i1 [[TMP38]], label %[[ASAN_REPORT:.*]], label %[[BB43:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP36]], label %[[BB41:.*]], label %[[BB42:.*]]
+; CHECK:       [[BB41]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP60]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP40]]
-; CHECK:       42:
-; CHECK-NEXT:    br label [[TMP41]]
-; CHECK:       43:
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
 ; CHECK-NEXT:    [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP59]] to i64
 ; CHECK-NEXT:    [[TMP62:%.*]] = lshr i64 [[TMP61]], 3
 ; CHECK-NEXT:    [[TMP46:%.*]] = add i64 [[TMP62]], 2147450880
@@ -81,30 +81,30 @@ define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32],
 ; CHECK-NEXT:    [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]])
 ; CHECK-NEXT:    [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0
-; CHECK-NEXT:    br i1 [[TMP55]], label [[ASAN_REPORT1:%.*]], label [[TMP58:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP53]], label [[TMP56:%.*]], label [[TMP57:%.*]]
-; CHECK:       56:
+; CHECK-NEXT:    br i1 [[TMP55]], label %[[ASAN_REPORT1:.*]], label %[[BB58:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP53]], label %[[BB56:.*]], label %[[BB57:.*]]
+; CHECK:       [[BB56]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP61]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP57]]
-; CHECK:       57:
-; CHECK-NEXT:    br label [[TMP58]]
-; CHECK:       58:
+; CHECK-NEXT:    br label %[[BB57]]
+; CHECK:       [[BB57]]:
+; CHECK-NEXT:    br label %[[BB58]]
+; CHECK:       [[BB58]]:
 ; CHECK-NEXT:    [[TMP42:%.*]] = cmpxchg ptr addrspace(1) [[TMP25]], i32 7, i32 [[SWAP]] seq_cst monotonic, align 4
 ; CHECK-NEXT:    [[RESULT:%.*]] = extractvalue { i32, i1 } [[TMP42]], 0
 ; CHECK-NEXT:    store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    [[TMP43:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64
-; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP45]], i64 [[TMP44]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   %gep = getelementptr i32, ptr addrspace(3) @lds_1, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
index b87b3fd824dd3..85f650106b687 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 @lds_1 = internal addrspace(3) global [1 x i32] poison, align 4
@@ -12,15 +12,15 @@
 define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @atomicrmw_kernel(
 ; CHECK-SAME: ptr addrspace(1) [[ARG0:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP26:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP64:%.*]] = or i32 [[TMP0]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP65:%.*]] = or i32 [[TMP64]], [[TMP45]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP65]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
@@ -39,9 +39,9 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
 ; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
-; CHECK-NEXT:    br label [[TMP20]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.atomicrmw_kernel, align 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -66,16 +66,16 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP39:%.*]] = and i1 [[TMP34]], [[TMP38]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP39]])
 ; CHECK-NEXT:    [[TMP41:%.*]] = icmp ne i64 [[TMP40]], 0
-; CHECK-NEXT:    br i1 [[TMP41]], label [[ASAN_REPORT:%.*]], label [[TMP44:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK:       asan.report:
-; CHECK-NEXT:    br i1 [[TMP39]], label [[TMP42:%.*]], label [[TMP43:%.*]]
-; CHECK:       44:
+; CHECK-NEXT:    br i1 [[TMP41]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP39]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP99]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP43]]
-; CHECK:       45:
-; CHECK-NEXT:    br label [[TMP44]]
-; CHECK:       46:
+; CHECK-NEXT:    br label %[[BB45]]
+; CHECK:       [[BB45]]:
+; CHECK-NEXT:    br label %[[BB46]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    [[TMP100:%.*]] = ptrtoint ptr addrspace(1) [[TMP98]] to i64
 ; CHECK-NEXT:    [[TMP101:%.*]] = lshr i64 [[TMP100]], 3
 ; CHECK-NEXT:    [[TMP102:%.*]] = add i64 [[TMP101]], 2147450880
@@ -88,16 +88,16 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP108:%.*]] = and i1 [[TMP105]], [[TMP107]]
 ; CHECK-NEXT:    [[TMP109:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP108]])
 ; CHECK-NEXT:    [[TMP110:%.*]] = icmp ne i64 [[TMP109]], 0
-; CHECK-NEXT:    br i1 [[TMP110]], label [[ASAN_REPORT1:%.*]], label [[TMP111:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report1:
-; CHECK-NEXT:    br i1 [[TMP108]], label [[TMP112:%.*]], label [[TMP113:%.*]]
-; CHECK:       59:
+; CHECK-NEXT:    br i1 [[TMP110]], label %[[ASAN_REPORT1:.*]], label %[[BB61:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP108]], label %[[BB59:.*]], label %[[BB60:.*]]
+; CHECK:       [[BB59]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP100]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP113]]
-; CHECK:       60:
-; CHECK-NEXT:    br label [[TMP111]]
-; CHECK:       61:
+; CHECK-NEXT:    br label %[[BB60]]
+; CHECK:       [[BB60]]:
+; CHECK-NEXT:    br label %[[BB61]]
+; CHECK:       [[BB61]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw umin ptr addrspace(1) [[TMP28]], i32 [[TMP1]] seq_cst, align 4
 ; CHECK-NEXT:    [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
 ; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP46]]
@@ -116,16 +116,16 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP58:%.*]] = and i1 [[TMP53]], [[TMP57]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
 ; CHECK-NEXT:    [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
-; CHECK-NEXT:    br i1 [[TMP60]], label [[ASAN_REPORT2:%.*]], label [[TMP63:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report2:
-; CHECK-NEXT:    br i1 [[TMP58]], label [[TMP61:%.*]], label [[TMP62:%.*]]
-; CHECK:       80:
+; CHECK-NEXT:    br i1 [[TMP60]], label %[[ASAN_REPORT2:.*]], label %[[BB82:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP58]], label %[[BB80:.*]], label %[[BB81:.*]]
+; CHECK:       [[BB80]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP116]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP62]]
-; CHECK:       81:
-; CHECK-NEXT:    br label [[TMP63]]
-; CHECK:       82:
+; CHECK-NEXT:    br label %[[BB81]]
+; CHECK:       [[BB81]]:
+; CHECK-NEXT:    br label %[[BB82]]
+; CHECK:       [[BB82]]:
 ; CHECK-NEXT:    [[TMP117:%.*]] = ptrtoint ptr addrspace(1) [[TMP115]] to i64
 ; CHECK-NEXT:    [[TMP118:%.*]] = lshr i64 [[TMP117]], 3
 ; CHECK-NEXT:    [[TMP119:%.*]] = add i64 [[TMP118]], 2147450880
@@ -138,16 +138,16 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP92:%.*]] = and i1 [[TMP88]], [[TMP91]]
 ; CHECK-NEXT:    [[TMP93:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP92]])
 ; CHECK-NEXT:    [[TMP94:%.*]] = icmp ne i64 [[TMP93]], 0
-; CHECK-NEXT:    br i1 [[TMP94]], label [[ASAN_REPORT3:%.*]], label [[TMP97:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report3:
-; CHECK-NEXT:    br i1 [[TMP92]], label [[TMP95:%.*]], label [[TMP96:%.*]]
-; CHECK:       95:
+; CHECK-NEXT:    br i1 [[TMP94]], label %[[ASAN_REPORT3:.*]], label %[[BB97:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT3]]:
+; CHECK-NEXT:    br i1 [[TMP92]], label %[[BB95:.*]], label %[[BB96:.*]]
+; CHECK:       [[BB95]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP117]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP96]]
-; CHECK:       96:
-; CHECK-NEXT:    br label [[TMP97]]
-; CHECK:       97:
+; CHECK-NEXT:    br label %[[BB96]]
+; CHECK:       [[BB96]]:
+; CHECK-NEXT:    br label %[[BB97]]
+; CHECK:       [[BB97]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw umax ptr addrspace(1) [[TMP47]], i32 [[TMP1]] seq_cst, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
@@ -165,28 +165,28 @@ define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_add
 ; CHECK-NEXT:    [[TMP78:%.*]] = and i1 [[TMP73]], [[TMP77]]
 ; CHECK-NEXT:    [[TMP79:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP78]])
 ; CHECK-NEXT:    [[TMP80:%.*]] = icmp ne i64 [[TMP79]], 0
-; CHECK-NEXT:    br i1 [[TMP80]], label [[ASAN_REPORT4:%.*]], label [[TMP83:%.*]], !prof [[PROF2]]
-; CHECK:       asan.report4:
-; CHECK-NEXT:    br i1 [[TMP78]], label [[TMP81:%.*]], label [[TMP82:%.*]]
-; CHECK:       115:
+; CHECK-NEXT:    br i1 [[TMP80]], label %[[ASAN_REPORT4:.*]], label %[[BB117:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT4]]:
+; CHECK-NEXT:    br i1 [[TMP78]], label %[[BB115:.*]], label %[[BB116:.*]]
+; CHECK:       [[BB115]]:
 ; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP68]]) #[[ATTR6]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label [[TMP82]]
-; CHECK:       116:
-; CHECK-NEXT:    br label [[TMP83]]
-; CHECK:       117:
+; CHECK-NEXT:    br label %[[BB116]]
+; CHECK:       [[BB116]]:
+; CHECK-NEXT:    br label %[[BB117]]
+; CHECK:       [[BB117]]:
 ; CHECK-NEXT:    store i32 [[TMP4]], ptr addrspace(1) [[TMP67]], align 4
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP86:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    [[TMP84:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP85:%.*]] = ptrtoint ptr [[TMP84]] to i64
-; CHECK-NEXT:    [[TMP86:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP86]], i64 [[TMP85]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   %1 = load volatile i32, ptr addrspace(1) %arg0
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll
index 066b9429425ac..917aa3247c07d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 ; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
@@ -7,8 +7,8 @@
 @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 } }
 ;.
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
@@ -20,29 +20,17 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %Malloc, label %[[BB11:.*]]
 ; CHECK:       Malloc:
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP11]])
-; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP15]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT:    [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP41]], i64 24)
-; CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33
-; CHECK-NEXT:    [[TMP62:%.*]] = ptrtoint ptr addrspace(1) [[TMP61]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP62]], i64 31)
-; CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68
-; CHECK-NEXT:    [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP64]], i64 28)
-; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB11]]
+; CHECK:       [[BB11]]: {{.*}}
+; CHECK:         [[XYZCOND:%.*]] = phi i1 [ false, %WId ], [ true, %Malloc ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -55,17 +43,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP43:%.*]] = ptrtoint ptr addrspace(3) [[TMP24]] to i32
 ; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP43]]
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP44]], align 2
-; CHECK-NEXT:    br label [[CONDFREE1:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
-; CHECK-NEXT:    [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP22]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -79,8 +65,8 @@ define amdgpu_kernel void @k0() sanitize_address {
 ;.
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-vector-ptrs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-vector-ptrs.ll
index 34caf91def933..53fd8b0683bed 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-vector-ptrs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-vector-ptrs.ll
@@ -6,8 +6,8 @@
 @lds_var2 = internal addrspace(3) global i32 poison
 
 ;.
-; CHECK: @llvm.amdgcn.sw.lds.example = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.example.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.example.md.type { %llvm.amdgcn.sw.lds.example.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.example.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.example.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.example = internal addrspace(3) global ptr poison, align 4, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.example.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.example.md.type { %llvm.amdgcn.sw.lds.example.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.example.md.item { i32 8, i32 4, i32 4 }, %llvm.amdgcn.sw.lds.example.md.item { i32 12, i32 4, i32 4 } }
 ;.
 define amdgpu_kernel void @example() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @example(
@@ -25,20 +25,8 @@ define amdgpu_kernel void @example() sanitize_address {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_EXAMPLE_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.example.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    [[TMP13:%.*]] = call ptr addrspace(1) @__ockl_dm_alloc(i64 [[TMP9]])
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.example, align 8
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
-; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 28)
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
-; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
 ; CHECK-NEXT:    br label %[[ENTRY]]
 ; CHECK:       [[ENTRY]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
@@ -62,10 +50,8 @@ define amdgpu_kernel void @example() sanitize_address {
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
 ; CHECK:       [[FREE]]:
-; CHECK-NEXT:    [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64
 ; CHECK-NEXT:    [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP35]], i64 [[TMP34]])
+; CHECK-NEXT:    call void @__ockl_dm_dealloc(i64 [[TMP35]])
 ; CHECK-NEXT:    br label %[[END]]
 ; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
@@ -87,8 +73,7 @@ entry:
 ;.
 ; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
 ;.
 ; CHECK: [[META0]] = !{i32 0, i32 1}
 ; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}



More information about the llvm-commits mailing list