[llvm] [AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass (PR #89208)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 25 07:00:54 PDT 2024


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/89208

>From 65dd67a36141133510c9e5c2c71e30069524e0d7 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 8 Mar 2024 16:43:57 +0530
Subject: [PATCH 1/2] [AMDGPU] Enable asan LDS instrumentation

---
 .../Instrumentation/AddressSanitizer.cpp      | 387 +++++++++++--
 .../asan-dynamic-lds-indirect-access.ll       | 529 ++++++++++++++++++
 .../AMDGPU/asan-dynamic-lds-test.ll           | 231 ++++++++
 .../AMDGPU/asan-static-indirect-access.ll     | 476 ++++++++++++++++
 ...atic-lds-indirect-access-function-param.ll | 203 +++++++
 .../AMDGPU/asan-static-lds-test.ll            | 249 +++++++++
 .../AMDGPU/asan_do_not_instrument_lds.ll      |  27 -
 .../asan_instrument_generic_address_space.ll  |  96 ++--
 8 files changed, 2071 insertions(+), 127 deletions(-)
 create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
 create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
 create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
 create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
 create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
 delete mode 100644 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 9cc978dc6c16e..a312f1d50fe9a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -176,6 +176,7 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
 const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
 const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
 const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id";
 const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
@@ -628,12 +629,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
 
 } // namespace llvm
 
-static uint64_t getRedzoneSizeForScale(int MappingScale) {
-  // Redzone used for stack and globals is at least 32 bytes.
-  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
-  return std::max(32U, 1U << MappingScale);
-}
-
 static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
   if (TargetTriple.isOSEmscripten()) {
     return kAsanEmscriptenCtorAndDtorPriority;
@@ -939,10 +934,7 @@ class ModuleAddressSanitizer {
   StringRef getGlobalMetadataSection() const;
   void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
   void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
-  uint64_t getMinRedzoneSizeForGlobal() const {
-    return getRedzoneSizeForScale(Mapping.Scale);
-  }
-  uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const;
+
   int GetAsanVersion(const Module &M) const;
 
   bool CompileKernel;
@@ -1239,6 +1231,290 @@ void AddressSanitizerPass::printPipeline(
   OS << '>';
 }
 
+static uint64_t getRedzoneSizeForScale(int MappingScale) {
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  return std::max(32U, 1U << MappingScale);
+}
+
+static uint64_t getMinRedzoneSizeForGlobal(int Scale) {
+  return getRedzoneSizeForScale(Scale);
+}
+
+static uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes) {
+  constexpr uint64_t kMaxRZ = 1 << 18;
+  const uint64_t MinRZ = getMinRedzoneSizeForGlobal(Scale);
+
+  uint64_t RZ = 0;
+  if (SizeInBytes <= MinRZ / 2) {
+    // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
+    // at least 32 bytes, optimize when SizeInBytes is less than or equal to
+    // half of MinRZ.
+    RZ = MinRZ - SizeInBytes;
+  } else {
+    // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+    RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
+
+    // Round up to multiple of MinRZ.
+    if (SizeInBytes % MinRZ)
+      RZ += MinRZ - (SizeInBytes % MinRZ);
+  }
+
+  assert((RZ + SizeInBytes) % MinRZ == 0);
+
+  return RZ;
+}
+
+static GlobalVariable *getKernelSwLDSGlobal(Module &M, Function &F) {
+  SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+  KernelLDSName += F.getName();
+  return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwLDSMetadataGlobal(Module &M, Function &F) {
+  SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+  KernelLDSName += F.getName();
+  KernelLDSName += ".md";
+  return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwDynLDSGlobal(Module &M, Function &F) {
+  SmallString<64> KernelLDSName("llvm.amdgcn.");
+  KernelLDSName += F.getName();
+  KernelLDSName += ".dynlds";
+  return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwLDSBaseGlobal(Module &M) {
+  SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.base.table");
+  return M.getNamedGlobal(KernelLDSName);
+}
+
+static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset,
+                                bool UsesDynLDS) {
+  if (Offset != 0) {
+    std::string Buffer;
+    raw_string_ostream SS{Buffer};
+    SS << format("%u", Offset);
+    if (UsesDynLDS)
+      SS << format(",%u", Offset);
+    Func->addFnAttr("amdgpu-lds-size", Buffer);
+  }
+}
+
+static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV,
+                                     uint32_t Address) {
+  LLVMContext &Ctx = M.getContext();
+  auto *IntTy = M.getDataLayout().getIntPtrType(Ctx, 3);
+  auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address));
+  auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1));
+  GV->setMetadata(LLVMContext::MD_absolute_symbol,
+                  MDNode::get(Ctx, {MinC, MaxC}));
+}
+
+static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
+  Module *M = F.getParent();
+  GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+  GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+  if (!SwLDSMetadataGlobal || !SwLDSGlobal)
+    return;
+
+  LLVMContext &Ctx = M->getContext();
+  Type *Int32Ty = Type::getInt32Ty(Ctx);
+
+  Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+  Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne());
+  Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne());
+
+  StructType *MDStructType =
+      cast<StructType>(SwLDSMetadataGlobal->getValueType());
+  assert(MDStructType);
+  unsigned NumStructs = MDStructType->getNumElements();
+
+  std::vector<Type *> Items;
+  std::vector<Constant *> Initializers;
+  uint32_t MallocSize = 0;
+  //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size}
+  StructType *LDSItemTy = StructType::create(
+      Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int32Ty}, "");
+  for (unsigned i = 0; i < NumStructs; i++) {
+    Items.push_back(LDSItemTy);
+    ConstantStruct *member =
+        dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+    Constant *NewInitItem;
+    if (member) {
+      ConstantInt *GlobalSize =
+          cast<ConstantInt>(member->getAggregateElement(1U));
+      unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+      Constant *NewItemStartOffset = ConstantInt::get(Int32Ty, MallocSize);
+      if (GlobalSizeValue) {
+        Constant *NewItemGlobalSizeConst =
+            ConstantInt::get(Int32Ty, GlobalSizeValue);
+        const uint64_t RightRedzoneSize =
+            getRedzoneSizeForGlobal(Scale, GlobalSizeValue);
+        MallocSize += GlobalSizeValue;
+        Constant *NewItemRedzoneStartOffset =
+            ConstantInt::get(Int32Ty, MallocSize);
+        MallocSize += RightRedzoneSize;
+        Constant *NewItemRedzoneSize =
+            ConstantInt::get(Int32Ty, RightRedzoneSize);
+
+        unsigned NewItemAlignGlobalPlusRedzoneSize =
+            alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign);
+        Constant *NewItemAlignGlobalPlusRedzoneSizeConst =
+            ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize);
+        NewInitItem = ConstantStruct::get(
+            LDSItemTy, {NewItemStartOffset, NewItemGlobalSizeConst,
+                        NewItemAlignGlobalPlusRedzoneSizeConst,
+                        NewItemRedzoneStartOffset, NewItemRedzoneSize});
+        MallocSize = alignTo(MallocSize, LDSAlign);
+      } else {
+        Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+        Constant *zero = ConstantInt::get(Int32Ty, 0);
+        NewInitItem = ConstantStruct::get(
+            LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+      }
+    } else {
+      Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+      Constant *zero = ConstantInt::get(Int32Ty, 0);
+      NewInitItem = ConstantStruct::get(
+          LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+    }
+    Initializers.push_back(NewInitItem);
+  }
+  GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F);
+  bool usesDynLDS = SwDynLDS ? true : false;
+  updateLDSSizeFnAttr(&F, MallocSize, usesDynLDS);
+  if (usesDynLDS)
+    recordLDSAbsoluteAddress(*M, SwDynLDS, MallocSize);
+
+  StructType *MetadataStructType = StructType::create(Ctx, Items, "");
+
+  GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable(
+      *M, MetadataStructType, false, GlobalValue::InternalLinkage,
+      PoisonValue::get(MetadataStructType), "", nullptr,
+      GlobalValue::NotThreadLocal, 1, false);
+  Constant *Data = ConstantStruct::get(MetadataStructType, Initializers);
+  NewSwLDSMetadataGlobal->setInitializer(Data);
+  NewSwLDSMetadataGlobal->setAlignment(MdAlign);
+  GlobalValue::SanitizerMetadata MD;
+  MD.NoAddress = true;
+  NewSwLDSMetadataGlobal->setSanitizerMetadata(MD);
+
+  for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(U.getUser())) {
+      SmallVector<Constant *> Indices;
+      for (Use &Idx : GEP->indices()) {
+        Indices.push_back(cast<Constant>(Idx));
+      }
+      Constant *NewGEP = ConstantExpr::getGetElementPtr(
+          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+      GEP->replaceAllUsesWith(NewGEP);
+    } else if (LoadInst *Load = dyn_cast<LoadInst>(U.getUser())) {
+      Constant *zero = ConstantInt::get(Int32Ty, 0);
+      SmallVector<Constant *> Indices{zero, zero, zero};
+      Constant *NewGEP = ConstantExpr::getGetElementPtr(
+          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+      IRBuilder<> IRB(Load);
+      LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP);
+      Load->replaceAllUsesWith(NewLoad);
+      Load->eraseFromParent();
+    } else if (StoreInst *Store = dyn_cast<StoreInst>(U.getUser())) {
+      Constant *zero = ConstantInt::get(Int32Ty, 0);
+      SmallVector<Constant *> Indices{zero, zero, zero};
+      Constant *NewGEP = ConstantExpr::getGetElementPtr(
+          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+      IRBuilder<> IRB(Store);
+      StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP);
+      Store->replaceAllUsesWith(NewStore);
+      Store->eraseFromParent();
+    } else
+      report_fatal_error("AMDGPU Sw LDS Metadata User instruction not handled");
+  }
+  SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal);
+  NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal);
+  SwLDSMetadataGlobal->eraseFromParent();
+  return;
+}
+
+static void poisonRedzonesForSwLDS(Function &F) {
+  Module *M = F.getParent();
+  GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+  GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+
+  if (!SwLDSGlobal || !SwLDSMetadataGlobal)
+    return;
+
+  LLVMContext &Ctx = M->getContext();
+  Type *Int64Ty = Type::getInt64Ty(Ctx);
+  Type *VoidTy = Type::getVoidTy(Ctx);
+  FunctionCallee AsanPoisonRegion = M->getOrInsertFunction(
+      StringRef("__asan_poison_region"),
+      FunctionType::get(VoidTy, {Int64Ty, Int64Ty}, false));
+  Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+
+  for (User *U : SwLDSGlobal->users()) {
+    StoreInst *SI = dyn_cast<StoreInst>(U);
+    if (!SI)
+      continue;
+
+    Type *PtrTy =
+        cast<PointerType>(SI->getValueOperand()->getType()->getScalarType());
+    unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+    if (AddrSpace != 1)
+      report_fatal_error("AMDGPU illegal store to SW LDS");
+
+    StructType *MDStructType =
+        cast<StructType>(SwLDSMetadataGlobal->getValueType());
+    assert(MDStructType);
+    unsigned NumStructs = MDStructType->getNumElements();
+    Value *StoreMallocPointer = SI->getValueOperand();
+
+    for (unsigned i = 0; i < NumStructs; i++) {
+      ConstantStruct *member =
+          dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+      if (!member)
+        continue;
+
+      ConstantInt *GlobalSize =
+          cast<ConstantInt>(member->getAggregateElement(1U));
+      unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+
+      if (!GlobalSizeValue)
+        continue;
+      IRBuilder<> IRB(SI);
+      IRB.SetInsertPoint(SI->getNextNode());
+
+      auto *GEPForOffset = IRB.CreateInBoundsGEP(
+          MDStructType, SwLDSMetadataGlobal,
+          {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)});
+
+      auto *GEPForSize = IRB.CreateInBoundsGEP(
+          MDStructType, SwLDSMetadataGlobal,
+          {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(4)});
+
+      Value *RedzoneOffset = IRB.CreateLoad(IRB.getInt32Ty(), GEPForOffset);
+      RedzoneOffset = IRB.CreateZExt(RedzoneOffset, IRB.getInt64Ty());
+      Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP(
+          IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset});
+      Value *RedzoneAddress =
+          IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty());
+      Value *RedzoneSize = IRB.CreateLoad(IRB.getInt32Ty(), GEPForSize);
+      RedzoneSize = IRB.CreateZExt(RedzoneSize, IRB.getInt64Ty());
+      IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, RedzoneSize});
+    }
+  }
+  return;
+}
+
+static void preProcessAMDGPULDSAccesses(Module &M, int Scale) {
+  for (Function &F : M) {
+    UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
+    poisonRedzonesForSwLDS(F);
+  }
+  return;
+}
+
 AddressSanitizerPass::AddressSanitizerPass(
     const AddressSanitizerOptions &Options, bool UseGlobalGC,
     bool UseOdrIndicator, AsanDtorKind DestructorKind,
@@ -1249,6 +1525,13 @@ AddressSanitizerPass::AddressSanitizerPass(
 
 PreservedAnalyses AddressSanitizerPass::run(Module &M,
                                             ModuleAnalysisManager &MAM) {
+  Triple TargetTriple = Triple(M.getTargetTriple());
+
+  if (TargetTriple.isAMDGPU()) {
+    unsigned LongSize = M.getDataLayout().getPointerSizeInBits();
+    ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false);
+    preProcessAMDGPULDSAccesses(M, Mapping.Scale);
+  }
   ModuleAddressSanitizer ModuleSanitizer(
       M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover,
       UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind);
@@ -1304,7 +1587,15 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
 static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
   Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
   unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
-  if (AddrSpace == 3 || AddrSpace == 5)
+  if (AddrSpace == 5)
+    return true;
+  return false;
+}
+
+static bool isGlobalInAMDGPULdsAddrspace(Value *Addr) {
+  Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+  unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+  if (AddrSpace == 3)
     return true;
   return false;
 }
@@ -1795,10 +2086,8 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
     return InsertBefore;
   // Instrument generic addresses in supported addressspaces.
   IRBuilder<> IRB(InsertBefore);
-  Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {Addr});
   Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {Addr});
-  Value *IsSharedOrPrivate = IRB.CreateOr(IsShared, IsPrivate);
-  Value *Cmp = IRB.CreateNot(IsSharedOrPrivate);
+  Value *Cmp = IRB.CreateNot(IsPrivate);
   Value *AddrSpaceZeroLanding =
       SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
   InsertBefore = cast<Instruction>(AddrSpaceZeroLanding);
@@ -1857,7 +2146,38 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     return;
   }
 
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  Value *AddrLong;
+  if (TargetTriple.isAMDGCN()) {
+    Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+    if (PtrTy->getPointerAddressSpace() == 3) {
+      Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+      Function *Func = IRB.GetInsertBlock()->getParent();
+      Value *SwLDS;
+      if (Func->getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+        SwLDS = getKernelSwLDSGlobal(*M, *Func);
+      } else {
+        GlobalVariable *LDSBaseTable = getKernelSwLDSBaseGlobal(*M);
+        if (LDSBaseTable) {
+          auto *KernelId = IRB.CreateCall(
+              M->getOrInsertFunction(kAMDGPULDSKernelId, IRB.getInt32Ty()), {});
+          Value *BaseGEP =
+              IRB.CreateInBoundsGEP(LDSBaseTable->getValueType(), LDSBaseTable,
+                                    {IRB.getInt32(0), KernelId});
+          SwLDS = IRB.CreateLoad(IRB.getPtrTy(3), BaseGEP);
+        } else {
+          SwLDS = IRB.CreateIntToPtr(IRB.getInt32(0), IRB.getPtrTy(3));
+        }
+      }
+      Value *PtrToInt = IRB.CreatePtrToInt(Addr, IRB.getInt32Ty());
+      Value *LoadMallocPtr = IRB.CreateLoad(IRB.getPtrTy(1), SwLDS);
+      Value *GEP =
+          IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt});
+      AddrLong = IRB.CreatePointerCast(GEP, IntptrTy);
+    } else
+      AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  } else
+    AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
   if (UseCalls) {
     if (Exp == 0)
       RTCI.createRuntimeCall(
@@ -2021,7 +2341,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
   if (!G->hasInitializer()) return false;
   // Globals in address space 1 and 4 are supported for AMDGPU.
   if (G->getAddressSpace() &&
-      !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)))
+      (!(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)) ||
+       !(TargetTriple.isAMDGPU() && !isGlobalInAMDGPULdsAddrspace(G))))
     return false;
   if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
   // Two problems with thread-locals:
@@ -2029,7 +2350,9 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
   //   - Need to poison all copies, not just the main thread's one.
   if (G->isThreadLocal()) return false;
   // For now, just ignore this Global if the alignment is large.
-  if (G->getAlign() && *G->getAlign() > getMinRedzoneSizeForGlobal()) return false;
+  if (G->getAlign() &&
+      *G->getAlign() > getMinRedzoneSizeForGlobal(Mapping.Scale))
+    return false;
 
   // For non-COFF targets, only instrument globals known to be defined by this
   // TU.
@@ -2552,7 +2875,8 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
 
     Type *Ty = G->getValueType();
     const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
-    const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes);
+    const uint64_t RightRedzoneSize =
+        getRedzoneSizeForGlobal(Mapping.Scale, SizeInBytes);
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy);
@@ -2568,7 +2892,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
         G->getThreadLocalMode(), G->getAddressSpace());
     NewGlobal->copyAttributesFrom(G);
     NewGlobal->setComdat(G->getComdat());
-    NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal()));
+    NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal(Mapping.Scale)));
     // Don't fold globals with redzones. ODR violation detector and redzone
     // poisoning implicitly creates a dependence on the global's address, so it
     // is no longer valid for it to be marked unnamed_addr.
@@ -2688,31 +3012,6 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   LLVM_DEBUG(dbgs() << M);
 }
 
-uint64_t
-ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const {
-  constexpr uint64_t kMaxRZ = 1 << 18;
-  const uint64_t MinRZ = getMinRedzoneSizeForGlobal();
-
-  uint64_t RZ = 0;
-  if (SizeInBytes <= MinRZ / 2) {
-    // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
-    // at least 32 bytes, optimize when SizeInBytes is less than or equal to
-    // half of MinRZ.
-    RZ = MinRZ - SizeInBytes;
-  } else {
-    // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
-    RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
-
-    // Round up to multiple of MinRZ.
-    if (SizeInBytes % MinRZ)
-      RZ += MinRZ - (SizeInBytes % MinRZ);
-  }
-
-  assert((RZ + SizeInBytes) % MinRZ == 0);
-
-  return RZ;
-}
-
 int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
   int LongSize = M.getDataLayout().getPointerSizeInBits();
   bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
new file mode 100755
index 0000000000000..f37fbf350ffde
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
@@ -0,0 +1,529 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol !1
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }, no_sanitize_address
+ at llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+ at llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @use_variables() #0 {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]]
+; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]])
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT:    br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB17]]
+; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i64 [[TMP21]], 3
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880
+; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0
+; CHECK-NEXT:    [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]])
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0
+; CHECK-NEXT:    br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]]
+; CHECK:       [[BB29]]:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB30]]
+; CHECK:       [[BB30]]:
+; CHECK-NEXT:    br label %[[BB31]]
+; CHECK:       [[BB31]]:
+; CHECK-NEXT:    [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64
+; CHECK-NEXT:    [[TMP34:%.*]] = lshr i64 [[TMP33]], 3
+; CHECK-NEXT:    [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880
+; CHECK-NEXT:    [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr
+; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0
+; CHECK-NEXT:    [[TMP39:%.*]] = and i64 [[TMP33]], 7
+; CHECK-NEXT:    [[TMP40:%.*]] = add i64 [[TMP39]], 3
+; CHECK-NEXT:    [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8
+; CHECK-NEXT:    [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]]
+; CHECK-NEXT:    [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]]
+; CHECK-NEXT:    [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]])
+; CHECK-NEXT:    [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0
+; CHECK-NEXT:    br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]]
+; CHECK:       [[BB46]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB47]]
+; CHECK:       [[BB47]]:
+; CHECK-NEXT:    br label %[[BB48]]
+; CHECK:       [[BB48]]:
+; CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]]
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT:    [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT:    [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT:    [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT:    [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]])
+; CHECK-NEXT:    [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT3]]:
+; CHECK-NEXT:    br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]]
+; CHECK:       [[BB60]]:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB61]]
+; CHECK:       [[BB61]]:
+; CHECK-NEXT:    br label %[[BB62]]
+; CHECK:       [[BB62]]:
+; CHECK-NEXT:    [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8
+; CHECK-NEXT:    [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
+; CHECK-NEXT:    [[TMP65:%.*]] = lshr i64 [[TMP64]], 3
+; CHECK-NEXT:    [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880
+; CHECK-NEXT:    [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr
+; CHECK-NEXT:    [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
+; CHECK-NEXT:    [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0
+; CHECK-NEXT:    [[TMP70:%.*]] = and i64 [[TMP64]], 7
+; CHECK-NEXT:    [[TMP71:%.*]] = add i64 [[TMP70]], 3
+; CHECK-NEXT:    [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8
+; CHECK-NEXT:    [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]]
+; CHECK-NEXT:    [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]]
+; CHECK-NEXT:    [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]])
+; CHECK-NEXT:    [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0
+; CHECK-NEXT:    br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT4]]:
+; CHECK-NEXT:    br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]]
+; CHECK:       [[BB77]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB78]]
+; CHECK:       [[BB78]]:
+; CHECK-NEXT:    br label %[[BB79]]
+; CHECK:       [[BB79]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4
+; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]]
+; CHECK-NEXT:    [[TMP82:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP82]]
+; CHECK-NEXT:    [[TMP84:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP83]], align 4
+; CHECK-NEXT:    [[TMP85:%.*]] = ptrtoint ptr addrspace(3) [[TMP50]] to i32
+; CHECK-NEXT:    [[TMP86:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP84]], align 8
+; CHECK-NEXT:    [[TMP87:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP86]], i32 [[TMP85]]
+; CHECK-NEXT:    [[TMP88:%.*]] = ptrtoint ptr addrspace(1) [[TMP87]] to i64
+; CHECK-NEXT:    [[TMP89:%.*]] = lshr i64 [[TMP88]], 3
+; CHECK-NEXT:    [[TMP90:%.*]] = add i64 [[TMP89]], 2147450880
+; CHECK-NEXT:    [[TMP91:%.*]] = inttoptr i64 [[TMP90]] to ptr
+; CHECK-NEXT:    [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1
+; CHECK-NEXT:    [[TMP93:%.*]] = icmp ne i8 [[TMP92]], 0
+; CHECK-NEXT:    [[TMP94:%.*]] = and i64 [[TMP88]], 7
+; CHECK-NEXT:    [[TMP95:%.*]] = trunc i64 [[TMP94]] to i8
+; CHECK-NEXT:    [[TMP96:%.*]] = icmp sge i8 [[TMP95]], [[TMP92]]
+; CHECK-NEXT:    [[TMP97:%.*]] = and i1 [[TMP93]], [[TMP96]]
+; CHECK-NEXT:    [[TMP98:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP97]])
+; CHECK-NEXT:    [[TMP99:%.*]] = icmp ne i64 [[TMP98]], 0
+; CHECK-NEXT:    br i1 [[TMP99]], label %[[ASAN_REPORT5:.*]], label %[[BB102:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT5]]:
+; CHECK-NEXT:    br i1 [[TMP97]], label %[[BB100:.*]], label %[[BB101:.*]]
+; CHECK:       [[BB100]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP88]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB101]]
+; CHECK:       [[BB101]]:
+; CHECK-NEXT:    br label %[[BB102]]
+; CHECK:       [[BB102]]:
+; CHECK-NEXT:    store i8 3, ptr addrspace(3) [[TMP50]], align 4
+; CHECK-NEXT:    [[TMP103:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP103]]
+; CHECK-NEXT:    [[TMP105:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP104]], align 4
+; CHECK-NEXT:    [[TMP106:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32
+; CHECK-NEXT:    [[TMP107:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP105]], align 8
+; CHECK-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP107]], i32 [[TMP106]]
+; CHECK-NEXT:    [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP108]] to i64
+; CHECK-NEXT:    [[TMP110:%.*]] = lshr i64 [[TMP109]], 3
+; CHECK-NEXT:    [[TMP111:%.*]] = add i64 [[TMP110]], 2147450880
+; CHECK-NEXT:    [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr
+; CHECK-NEXT:    [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-NEXT:    [[TMP114:%.*]] = icmp ne i8 [[TMP113]], 0
+; CHECK-NEXT:    [[TMP115:%.*]] = and i64 [[TMP109]], 7
+; CHECK-NEXT:    [[TMP116:%.*]] = trunc i64 [[TMP115]] to i8
+; CHECK-NEXT:    [[TMP117:%.*]] = icmp sge i8 [[TMP116]], [[TMP113]]
+; CHECK-NEXT:    [[TMP118:%.*]] = and i1 [[TMP114]], [[TMP117]]
+; CHECK-NEXT:    [[TMP119:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP118]])
+; CHECK-NEXT:    [[TMP120:%.*]] = icmp ne i64 [[TMP119]], 0
+; CHECK-NEXT:    br i1 [[TMP120]], label %[[ASAN_REPORT6:.*]], label %[[BB123:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT6]]:
+; CHECK-NEXT:    br i1 [[TMP118]], label %[[BB121:.*]], label %[[BB122:.*]]
+; CHECK:       [[BB121]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP109]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB122]]
+; CHECK:       [[BB122]]:
+; CHECK-NEXT:    br label %[[BB123]]
+; CHECK:       [[BB123]]:
+; CHECK-NEXT:    store i8 3, ptr addrspace(3) [[TMP81]], align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = call i32 @llvm.amdgcn.lds.kernel.id()
+  %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1
+  %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4
+  %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0
+  %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8
+  %6 = load i32, ptr addrspace(1) %5, align 4
+  %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6
+  %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1
+  %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8
+  %10 = load i32, ptr addrspace(1) %9, align 4
+  %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10
+  store i8 3, ptr addrspace(3) %7, align 4
+  store i8 3, ptr addrspace(3) %11, align 8
+  ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB60:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
+; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP11]], 7
+; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[TMP17]], 3
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]]
+; CHECK-NEXT:    [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]])
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]]
+; CHECK:       [[BB24]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB25]]
+; CHECK:       [[BB25]]:
+; CHECK-NEXT:    br label %[[BB26]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 7
+; CHECK-NEXT:    [[TMP29:%.*]] = udiv i32 [[TMP28]], 8
+; CHECK-NEXT:    [[TMP30:%.*]] = mul i32 [[TMP29]], 8
+; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+; CHECK-NEXT:    [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
+; CHECK-NEXT:    store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT:    store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+; CHECK-NEXT:    [[TMP33:%.*]] = add i32 [[TMP32]], 7
+; CHECK-NEXT:    [[TMP34:%.*]] = udiv i32 [[TMP33]], 8
+; CHECK-NEXT:    [[TMP35:%.*]] = mul i32 [[TMP34]], 8
+; CHECK-NEXT:    store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = add i32 [[TMP31]], [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
+; CHECK-NEXT:    [[TMP38:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64
+; CHECK-NEXT:    [[TMP40:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP37]], i64 [[TMP39]])
+; CHECK-NEXT:    [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP41]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
+; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP43]]
+; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
+; CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = zext i32 [[TMP46]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP45]], i64 [[TMP47]])
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT:    [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
+; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP49]]
+; CHECK-NEXT:    [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP50]] to i64
+; CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = zext i32 [[TMP52]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP51]], i64 [[TMP53]])
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP55:%.*]] = zext i32 [[TMP54]] to i64
+; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP55]]
+; CHECK-NEXT:    [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
+; CHECK-NEXT:    [[TMP58:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = zext i32 [[TMP58]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP57]], i64 [[TMP59]])
+; CHECK-NEXT:    br label %[[BB60]]
+; CHECK:       [[BB60]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP61]]
+; CHECK-NEXT:    [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP63]]
+; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+; CHECK-NEXT:    call void @use_variables()
+; CHECK-NEXT:    [[TMP65:%.*]] = ptrtoint ptr addrspace(3) [[TMP62]] to i32
+; CHECK-NEXT:    [[TMP66:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP66]], i32 [[TMP65]]
+; CHECK-NEXT:    [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
+; CHECK-NEXT:    [[TMP69:%.*]] = lshr i64 [[TMP68]], 3
+; CHECK-NEXT:    [[TMP70:%.*]] = add i64 [[TMP69]], 2147450880
+; CHECK-NEXT:    [[TMP71:%.*]] = inttoptr i64 [[TMP70]] to ptr
+; CHECK-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
+; CHECK-NEXT:    [[TMP73:%.*]] = icmp ne i8 [[TMP72]], 0
+; CHECK-NEXT:    [[TMP74:%.*]] = and i64 [[TMP68]], 7
+; CHECK-NEXT:    [[TMP75:%.*]] = trunc i64 [[TMP74]] to i8
+; CHECK-NEXT:    [[TMP76:%.*]] = icmp sge i8 [[TMP75]], [[TMP72]]
+; CHECK-NEXT:    [[TMP77:%.*]] = and i1 [[TMP73]], [[TMP76]]
+; CHECK-NEXT:    [[TMP78:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP77]])
+; CHECK-NEXT:    [[TMP79:%.*]] = icmp ne i64 [[TMP78]], 0
+; CHECK-NEXT:    br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB82:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP77]], label %[[BB80:.*]], label %[[BB81:.*]]
+; CHECK:       [[BB80]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP68]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB81]]
+; CHECK:       [[BB81]]:
+; CHECK-NEXT:    br label %[[BB82]]
+; CHECK:       [[BB82]]:
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP62]], align 1
+; CHECK-NEXT:    [[TMP83:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i64
+; CHECK-NEXT:    [[TMP84:%.*]] = add i64 [[TMP83]], 3
+; CHECK-NEXT:    [[TMP85:%.*]] = inttoptr i64 [[TMP84]] to ptr addrspace(3)
+; CHECK-NEXT:    [[TMP86:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i32
+; CHECK-NEXT:    [[TMP87:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP87]], i32 [[TMP86]]
+; CHECK-NEXT:    [[TMP89:%.*]] = ptrtoint ptr addrspace(1) [[TMP88]] to i64
+; CHECK-NEXT:    [[TMP90:%.*]] = lshr i64 [[TMP89]], 3
+; CHECK-NEXT:    [[TMP91:%.*]] = add i64 [[TMP90]], 2147450880
+; CHECK-NEXT:    [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr
+; CHECK-NEXT:    [[TMP93:%.*]] = load i8, ptr [[TMP92]], align 1
+; CHECK-NEXT:    [[TMP94:%.*]] = icmp ne i8 [[TMP93]], 0
+; CHECK-NEXT:    [[TMP95:%.*]] = and i64 [[TMP89]], 7
+; CHECK-NEXT:    [[TMP96:%.*]] = trunc i64 [[TMP95]] to i8
+; CHECK-NEXT:    [[TMP97:%.*]] = icmp sge i8 [[TMP96]], [[TMP93]]
+; CHECK-NEXT:    [[TMP98:%.*]] = and i1 [[TMP94]], [[TMP97]]
+; CHECK-NEXT:    [[TMP99:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP98]])
+; CHECK-NEXT:    [[TMP100:%.*]] = icmp ne i64 [[TMP99]], 0
+; CHECK-NEXT:    br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB103:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP98]], label %[[BB101:.*]], label %[[BB102:.*]]
+; CHECK:       [[BB101]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP89]], i64 4) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB102]]
+; CHECK:       [[BB102]]:
+; CHECK-NEXT:    br label %[[BB103]]
+; CHECK:       [[BB103]]:
+; CHECK-NEXT:    [[TMP104:%.*]] = ptrtoint ptr addrspace(3) [[TMP85]] to i32
+; CHECK-NEXT:    [[TMP105:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP105]], i32 [[TMP104]]
+; CHECK-NEXT:    [[TMP107:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64
+; CHECK-NEXT:    [[TMP108:%.*]] = lshr i64 [[TMP107]], 3
+; CHECK-NEXT:    [[TMP109:%.*]] = add i64 [[TMP108]], 2147450880
+; CHECK-NEXT:    [[TMP110:%.*]] = inttoptr i64 [[TMP109]] to ptr
+; CHECK-NEXT:    [[TMP111:%.*]] = load i8, ptr [[TMP110]], align 1
+; CHECK-NEXT:    [[TMP112:%.*]] = icmp ne i8 [[TMP111]], 0
+; CHECK-NEXT:    [[TMP113:%.*]] = and i64 [[TMP107]], 7
+; CHECK-NEXT:    [[TMP114:%.*]] = trunc i64 [[TMP113]] to i8
+; CHECK-NEXT:    [[TMP115:%.*]] = icmp sge i8 [[TMP114]], [[TMP111]]
+; CHECK-NEXT:    [[TMP116:%.*]] = and i1 [[TMP112]], [[TMP115]]
+; CHECK-NEXT:    [[TMP117:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP116]])
+; CHECK-NEXT:    [[TMP118:%.*]] = icmp ne i64 [[TMP117]], 0
+; CHECK-NEXT:    br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB121:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT3]]:
+; CHECK-NEXT:    br i1 [[TMP116]], label %[[BB119:.*]], label %[[BB120:.*]]
+; CHECK:       [[BB119]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP107]], i64 4) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB120]]
+; CHECK:       [[BB120]]:
+; CHECK-NEXT:    br label %[[BB121]]
+; CHECK:       [[BB121]]:
+; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP64]], align 2
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP122:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP123:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP124:%.*]] = ptrtoint ptr [[TMP123]] to i64
+; CHECK-NEXT:    [[TMP125:%.*]] = ptrtoint ptr addrspace(1) [[TMP122]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP125]], i64 [[TMP124]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+WId:
+  %0 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = or i32 %0, %1
+  %4 = or i32 %3, %2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %Malloc, label %26
+
+Malloc:                                           ; preds = %WId
+  %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+  %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+  %8 = add i32 %6, %7
+  %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15
+  store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+  %11 = load i32, ptr addrspace(4) %10, align 4
+  store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+  %12 = add i32 %11, 7
+  %13 = udiv i32 %12, 8
+  %14 = mul i32 %13, 8
+  store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+  %15 = add i32 %8, %14
+  store i32 %15, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+  %16 = load i32, ptr addrspace(4) %10, align 4
+  store i32 %16, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+  %17 = add i32 %16, 7
+  %18 = udiv i32 %17, 8
+  %19 = mul i32 %18, 8
+  store i32 %19, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+  %20 = add i32 %15, %19
+  %21 = zext i32 %20 to i64
+  %22 = call ptr @llvm.returnaddress(i32 0)
+  %23 = ptrtoint ptr %22 to i64
+  %24 = call i64 @__asan_malloc_impl(i64 %21, i64 %23)
+  %25 = inttoptr i64 %24 to ptr addrspace(1)
+  store ptr addrspace(1) %25, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  br label %26
+
+26:                                               ; preds = %Malloc, %WId
+  %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+  call void @llvm.amdgcn.s.barrier()
+  %27 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+  %28 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %27
+  %29 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+  %30 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %29
+  call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+  call void @use_variables()
+  store i8 7, ptr addrspace(3) %28, align 1
+  store i32 8, ptr addrspace(3) %30, align 2
+  br label %CondFree
+
+CondFree:                                         ; preds = %26
+  call void @llvm.amdgcn.s.barrier()
+  br i1 %xyzCond, label %Free, label %End
+
+Free:                                             ; preds = %CondFree
+  %31 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  %32 = call ptr @llvm.returnaddress(i32 0)
+  %33 = ptrtoint ptr %32 to i64
+  %34 = ptrtoint ptr addrspace(1) %31 to i64
+  call void @__asan_free_impl(i64 %34, i64 %33)
+  br label %End
+
+End:                                              ; preds = %Free, %CondFree
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare void @llvm.donothing() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #2
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.lds.kernel.id() #3
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="24,24" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 24, i32 25}
+!2 = !{i32 0}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="96,96" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1]] = !{i32 96, i32 97}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META3]] = !{i32 0}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
new file mode 100755
index 0000000000000..1be50f48aa6f6
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0
+ at llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB43:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
+; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP11]], 7
+; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[TMP17]], 3
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]]
+; CHECK-NEXT:    [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]])
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]]
+; CHECK:       [[BB24]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB25]]
+; CHECK:       [[BB25]]:
+; CHECK-NEXT:    br label %[[BB26]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 0
+; CHECK-NEXT:    [[TMP29:%.*]] = udiv i32 [[TMP28]], 1
+; CHECK-NEXT:    [[TMP30:%.*]] = mul i32 [[TMP29]], 1
+; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
+; CHECK-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
+; CHECK-NEXT:    [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64
+; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]])
+; CHECK-NEXT:    [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP38:%.*]] = zext i32 [[TMP37]] to i64
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 [[TMP38]]
+; CHECK-NEXT:    [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
+; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[TMP42:%.*]] = zext i32 [[TMP41]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP40]], i64 [[TMP42]])
+; CHECK-NEXT:    br label %[[BB43]]
+; CHECK:       [[BB43]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]]
+; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+; CHECK-NEXT:    [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32
+; CHECK-NEXT:    [[TMP47:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP47]], i32 [[TMP46]]
+; CHECK-NEXT:    [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64
+; CHECK-NEXT:    [[TMP50:%.*]] = lshr i64 [[TMP49]], 3
+; CHECK-NEXT:    [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880
+; CHECK-NEXT:    [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr
+; CHECK-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
+; CHECK-NEXT:    [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0
+; CHECK-NEXT:    [[TMP55:%.*]] = and i64 [[TMP49]], 7
+; CHECK-NEXT:    [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP53]]
+; CHECK-NEXT:    [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]]
+; CHECK-NEXT:    [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
+; CHECK-NEXT:    [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
+; CHECK-NEXT:    br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF2]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP58]], label %[[BB61:.*]], label %[[BB62:.*]]
+; CHECK:       [[BB61]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB62]]
+; CHECK:       [[BB62]]:
+; CHECK-NEXT:    br label %[[BB63]]
+; CHECK:       [[BB63]]:
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP45]], align 4
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP64:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP65:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP66:%.*]] = ptrtoint ptr [[TMP65]] to i64
+; CHECK-NEXT:    [[TMP67:%.*]] = ptrtoint ptr addrspace(1) [[TMP64]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP67]], i64 [[TMP66]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+WId:
+  %0 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = or i32 %0, %1
+  %4 = or i32 %3, %2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %Malloc, label %21
+
+Malloc:                                           ; preds = %WId
+  %6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
+  %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+  %8 = add i32 %6, %7
+  %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15
+  store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+  %11 = load i32, ptr addrspace(4) %10, align 4
+  store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+  %12 = add i32 %11, 0
+  %13 = udiv i32 %12, 1
+  %14 = mul i32 %13, 1
+  store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+  %15 = add i32 %8, %14
+  %16 = zext i32 %15 to i64
+  %17 = call ptr @llvm.returnaddress(i32 0)
+  %18 = ptrtoint ptr %17 to i64
+  %19 = call i64 @__asan_malloc_impl(i64 %16, i64 %18)
+  %20 = inttoptr i64 %19 to ptr addrspace(1)
+  store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  br label %21
+
+21:                                               ; preds = %Malloc, %WId
+  %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+  call void @llvm.amdgcn.s.barrier()
+  %22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+  %23 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %22
+  call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+  store i8 7, ptr addrspace(3) %23, align 4
+  br label %CondFree
+
+CondFree:                                         ; preds = %21
+  call void @llvm.amdgcn.s.barrier()
+  br i1 %xyzCond, label %Free, label %End
+
+Free:                                             ; preds = %CondFree
+  %24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  %25 = call ptr @llvm.returnaddress(i32 0)
+  %26 = ptrtoint ptr %25 to i64
+  %27 = ptrtoint ptr addrspace(1) %24 to i64
+  call void @__asan_free_impl(i64 %27, i64 %26)
+  br label %End
+
+End:                                              ; preds = %Free, %CondFree
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare void @llvm.donothing() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #1
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #3
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address "amdgpu-lds-size"="8,8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 8, i32 9}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="32,32" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR7]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1]] = !{i32 32, i32 33}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
new file mode 100755
index 0000000000000..23f27aa797e73
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
@@ -0,0 +1,476 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 3, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 4, i32 8 } }, no_sanitize_address
+ at llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+ at llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 3, i32 32, i32 99, i32 29 }, %1 { i32 128, i32 4, i32 32, i32 132, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @use_variables() #0 {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]]
+; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]])
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT:    br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB17]]
+; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i64 [[TMP21]], 3
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880
+; CHECK-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0
+; CHECK-NEXT:    [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]])
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0
+; CHECK-NEXT:    br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]]
+; CHECK:       [[BB29]]:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB30]]
+; CHECK:       [[BB30]]:
+; CHECK-NEXT:    br label %[[BB31]]
+; CHECK:       [[BB31]]:
+; CHECK-NEXT:    [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT:    [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64
+; CHECK-NEXT:    [[TMP34:%.*]] = lshr i64 [[TMP33]], 3
+; CHECK-NEXT:    [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880
+; CHECK-NEXT:    [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr
+; CHECK-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0
+; CHECK-NEXT:    [[TMP39:%.*]] = and i64 [[TMP33]], 7
+; CHECK-NEXT:    [[TMP40:%.*]] = add i64 [[TMP39]], 3
+; CHECK-NEXT:    [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8
+; CHECK-NEXT:    [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]]
+; CHECK-NEXT:    [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]]
+; CHECK-NEXT:    [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]])
+; CHECK-NEXT:    [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0
+; CHECK-NEXT:    br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]]
+; CHECK:       [[BB46]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB47]]
+; CHECK:       [[BB47]]:
+; CHECK-NEXT:    br label %[[BB48]]
+; CHECK:       [[BB48]]:
+; CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]]
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT:    [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT:    [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT:    [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT:    [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]])
+; CHECK-NEXT:    [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT3]]:
+; CHECK-NEXT:    br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]]
+; CHECK:       [[BB60]]:
+; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB61]]
+; CHECK:       [[BB61]]:
+; CHECK-NEXT:    br label %[[BB62]]
+; CHECK:       [[BB62]]:
+; CHECK-NEXT:    [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8
+; CHECK-NEXT:    [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
+; CHECK-NEXT:    [[TMP65:%.*]] = lshr i64 [[TMP64]], 3
+; CHECK-NEXT:    [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880
+; CHECK-NEXT:    [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr
+; CHECK-NEXT:    [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
+; CHECK-NEXT:    [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0
+; CHECK-NEXT:    [[TMP70:%.*]] = and i64 [[TMP64]], 7
+; CHECK-NEXT:    [[TMP71:%.*]] = add i64 [[TMP70]], 3
+; CHECK-NEXT:    [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8
+; CHECK-NEXT:    [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]]
+; CHECK-NEXT:    [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]]
+; CHECK-NEXT:    [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]])
+; CHECK-NEXT:    [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0
+; CHECK-NEXT:    br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT4]]:
+; CHECK-NEXT:    br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]]
+; CHECK:       [[BB77]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB78]]
+; CHECK:       [[BB78]]:
+; CHECK-NEXT:    br label %[[BB79]]
+; CHECK:       [[BB79]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4
+; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]]
+; CHECK-NEXT:    [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr
+; CHECK-NEXT:    [[TMP82:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr
+; CHECK-NEXT:    [[TMP83:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP82]])
+; CHECK-NEXT:    [[TMP84:%.*]] = xor i1 [[TMP83]], true
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[BB85:.*]], label %[[BB101:.*]]
+; CHECK:       [[BB85]]:
+; CHECK-NEXT:    [[TMP86:%.*]] = ptrtoint ptr [[TMP82]] to i64
+; CHECK-NEXT:    [[TMP87:%.*]] = lshr i64 [[TMP86]], 3
+; CHECK-NEXT:    [[TMP88:%.*]] = add i64 [[TMP87]], 2147450880
+; CHECK-NEXT:    [[TMP89:%.*]] = inttoptr i64 [[TMP88]] to ptr
+; CHECK-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
+; CHECK-NEXT:    [[TMP91:%.*]] = icmp ne i8 [[TMP90]], 0
+; CHECK-NEXT:    [[TMP92:%.*]] = and i64 [[TMP86]], 7
+; CHECK-NEXT:    [[TMP93:%.*]] = trunc i64 [[TMP92]] to i8
+; CHECK-NEXT:    [[TMP94:%.*]] = icmp sge i8 [[TMP93]], [[TMP90]]
+; CHECK-NEXT:    [[TMP95:%.*]] = and i1 [[TMP91]], [[TMP94]]
+; CHECK-NEXT:    [[TMP96:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP95]])
+; CHECK-NEXT:    [[TMP97:%.*]] = icmp ne i64 [[TMP96]], 0
+; CHECK-NEXT:    br i1 [[TMP97]], label %[[ASAN_REPORT5:.*]], label %[[BB100:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT5]]:
+; CHECK-NEXT:    br i1 [[TMP95]], label %[[BB98:.*]], label %[[BB99:.*]]
+; CHECK:       [[BB98]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP86]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB99]]
+; CHECK:       [[BB99]]:
+; CHECK-NEXT:    br label %[[BB100]]
+; CHECK:       [[BB100]]:
+; CHECK-NEXT:    br label %[[BB101]]
+; CHECK:       [[BB101]]:
+; CHECK-NEXT:    store i8 3, ptr [[TMP82]], align 4
+; CHECK-NEXT:    [[TMP102:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP102]]
+; CHECK-NEXT:    [[TMP104:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP103]], align 4
+; CHECK-NEXT:    [[TMP105:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32
+; CHECK-NEXT:    [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP104]], align 8
+; CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP106]], i32 [[TMP105]]
+; CHECK-NEXT:    [[TMP108:%.*]] = ptrtoint ptr addrspace(1) [[TMP107]] to i64
+; CHECK-NEXT:    [[TMP109:%.*]] = lshr i64 [[TMP108]], 3
+; CHECK-NEXT:    [[TMP110:%.*]] = add i64 [[TMP109]], 2147450880
+; CHECK-NEXT:    [[TMP111:%.*]] = inttoptr i64 [[TMP110]] to ptr
+; CHECK-NEXT:    [[TMP112:%.*]] = load i8, ptr [[TMP111]], align 1
+; CHECK-NEXT:    [[TMP113:%.*]] = icmp ne i8 [[TMP112]], 0
+; CHECK-NEXT:    [[TMP114:%.*]] = and i64 [[TMP108]], 7
+; CHECK-NEXT:    [[TMP115:%.*]] = trunc i64 [[TMP114]] to i8
+; CHECK-NEXT:    [[TMP116:%.*]] = icmp sge i8 [[TMP115]], [[TMP112]]
+; CHECK-NEXT:    [[TMP117:%.*]] = and i1 [[TMP113]], [[TMP116]]
+; CHECK-NEXT:    [[TMP118:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP117]])
+; CHECK-NEXT:    [[TMP119:%.*]] = icmp ne i64 [[TMP118]], 0
+; CHECK-NEXT:    br i1 [[TMP119]], label %[[ASAN_REPORT6:.*]], label %[[BB122:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT6]]:
+; CHECK-NEXT:    br i1 [[TMP117]], label %[[BB120:.*]], label %[[BB121:.*]]
+; CHECK:       [[BB120]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP108]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB121]]
+; CHECK:       [[BB121]]:
+; CHECK-NEXT:    br label %[[BB122]]
+; CHECK:       [[BB122]]:
+; CHECK-NEXT:    store i8 3, ptr addrspace(3) [[TMP81]], align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = call i32 @llvm.amdgcn.lds.kernel.id()
+  %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1
+  %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4
+  %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0
+  %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8
+  %6 = load i32, ptr addrspace(1) %5, align 4
+  %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6
+  %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1
+  %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8
+  %10 = load i32, ptr addrspace(1) %9, align 4
+  %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10
+  %X = addrspacecast ptr addrspace(3) %7 to ptr
+  %12 = addrspacecast ptr addrspace(3) %7 to ptr
+  store i8 3, ptr %12, align 4
+  store i8 3, ptr addrspace(3) %11, align 8
+  ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB44:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 3), align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 4), align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 3), align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 4), align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT:    [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT:    [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
+; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT:    [[TMP33:%.*]] = zext i32 [[TMP32]] to i64
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP33]]
+; CHECK-NEXT:    [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP34]] to i64
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT:    [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP35]], i64 [[TMP37]])
+; CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP39:%.*]] = zext i32 [[TMP38]] to i64
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP39]]
+; CHECK-NEXT:    [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP41]], i64 [[TMP43]])
+; CHECK-NEXT:    br label %[[BB44]]
+; CHECK:       [[BB44]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP45]]
+; CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP47]]
+; CHECK-NEXT:    call void @use_variables()
+; CHECK-NEXT:    [[TMP49:%.*]] = ptrtoint ptr addrspace(3) [[TMP46]] to i32
+; CHECK-NEXT:    [[TMP50:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP50]], i32 [[TMP49]]
+; CHECK-NEXT:    [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT:    [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT:    [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT:    [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT:    [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT:    [[TMP58:%.*]] = and i64 [[TMP52]], 7
+; CHECK-NEXT:    [[TMP59:%.*]] = trunc i64 [[TMP58]] to i8
+; CHECK-NEXT:    [[TMP60:%.*]] = icmp sge i8 [[TMP59]], [[TMP56]]
+; CHECK-NEXT:    [[TMP61:%.*]] = and i1 [[TMP57]], [[TMP60]]
+; CHECK-NEXT:    [[TMP62:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP61]])
+; CHECK-NEXT:    [[TMP63:%.*]] = icmp ne i64 [[TMP62]], 0
+; CHECK-NEXT:    br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB66:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP61]], label %[[BB64:.*]], label %[[BB65:.*]]
+; CHECK:       [[BB64]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB65]]
+; CHECK:       [[BB65]]:
+; CHECK-NEXT:    br label %[[BB66]]
+; CHECK:       [[BB66]]:
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP46]], align 1
+; CHECK-NEXT:    [[TMP67:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i64
+; CHECK-NEXT:    [[TMP68:%.*]] = add i64 [[TMP67]], 3
+; CHECK-NEXT:    [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(3)
+; CHECK-NEXT:    [[TMP70:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i32
+; CHECK-NEXT:    [[TMP71:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP71]], i32 [[TMP70]]
+; CHECK-NEXT:    [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP72]] to i64
+; CHECK-NEXT:    [[TMP74:%.*]] = lshr i64 [[TMP73]], 3
+; CHECK-NEXT:    [[TMP75:%.*]] = add i64 [[TMP74]], 2147450880
+; CHECK-NEXT:    [[TMP76:%.*]] = inttoptr i64 [[TMP75]] to ptr
+; CHECK-NEXT:    [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
+; CHECK-NEXT:    [[TMP78:%.*]] = icmp ne i8 [[TMP77]], 0
+; CHECK-NEXT:    [[TMP79:%.*]] = and i64 [[TMP73]], 7
+; CHECK-NEXT:    [[TMP80:%.*]] = trunc i64 [[TMP79]] to i8
+; CHECK-NEXT:    [[TMP81:%.*]] = icmp sge i8 [[TMP80]], [[TMP77]]
+; CHECK-NEXT:    [[TMP82:%.*]] = and i1 [[TMP78]], [[TMP81]]
+; CHECK-NEXT:    [[TMP83:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP82]])
+; CHECK-NEXT:    [[TMP84:%.*]] = icmp ne i64 [[TMP83]], 0
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB87:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP82]], label %[[BB85:.*]], label %[[BB86:.*]]
+; CHECK:       [[BB85]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP73]], i64 4) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB86]]
+; CHECK:       [[BB86]]:
+; CHECK-NEXT:    br label %[[BB87]]
+; CHECK:       [[BB87]]:
+; CHECK-NEXT:    [[TMP88:%.*]] = ptrtoint ptr addrspace(3) [[TMP69]] to i32
+; CHECK-NEXT:    [[TMP89:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP89]], i32 [[TMP88]]
+; CHECK-NEXT:    [[TMP91:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64
+; CHECK-NEXT:    [[TMP92:%.*]] = lshr i64 [[TMP91]], 3
+; CHECK-NEXT:    [[TMP93:%.*]] = add i64 [[TMP92]], 2147450880
+; CHECK-NEXT:    [[TMP94:%.*]] = inttoptr i64 [[TMP93]] to ptr
+; CHECK-NEXT:    [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
+; CHECK-NEXT:    [[TMP96:%.*]] = icmp ne i8 [[TMP95]], 0
+; CHECK-NEXT:    [[TMP97:%.*]] = and i64 [[TMP91]], 7
+; CHECK-NEXT:    [[TMP98:%.*]] = trunc i64 [[TMP97]] to i8
+; CHECK-NEXT:    [[TMP99:%.*]] = icmp sge i8 [[TMP98]], [[TMP95]]
+; CHECK-NEXT:    [[TMP100:%.*]] = and i1 [[TMP96]], [[TMP99]]
+; CHECK-NEXT:    [[TMP101:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP100]])
+; CHECK-NEXT:    [[TMP102:%.*]] = icmp ne i64 [[TMP101]], 0
+; CHECK-NEXT:    br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB105:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP100]], label %[[BB103:.*]], label %[[BB104:.*]]
+; CHECK:       [[BB103]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP91]], i64 4) #[[ATTR8]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB104]]
+; CHECK:       [[BB104]]:
+; CHECK-NEXT:    br label %[[BB105]]
+; CHECK:       [[BB105]]:
+; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP48]], align 2
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP107:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP108:%.*]] = ptrtoint ptr [[TMP107]] to i64
+; CHECK-NEXT:    [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP109]], i64 [[TMP108]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+WId:
+  %0 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = or i32 %0, %1
+  %4 = or i32 %3, %2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %Malloc, label %14
+
+Malloc:                                           ; preds = %WId
+  %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+  %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+  %8 = add i32 %6, %7
+  %9 = zext i32 %8 to i64
+  %10 = call ptr @llvm.returnaddress(i32 0)
+  %11 = ptrtoint ptr %10 to i64
+  %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+  %13 = inttoptr i64 %12 to ptr addrspace(1)
+  store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  br label %14
+
+14:                                               ; preds = %Malloc, %WId
+  %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+  call void @llvm.amdgcn.s.barrier()
+  %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+  %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15
+  %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+  %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17
+  call void @use_variables()
+  store i8 7, ptr addrspace(3) %16, align 1
+  store i32 8, ptr addrspace(3) %18, align 2
+  br label %CondFree
+
+CondFree:                                         ; preds = %14
+  call void @llvm.amdgcn.s.barrier()
+  br i1 %xyzCond, label %Free, label %End
+
+Free:                                             ; preds = %CondFree
+  %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  %20 = call ptr @llvm.returnaddress(i32 0)
+  %21 = ptrtoint ptr %20 to i64
+  %22 = ptrtoint ptr addrspace(1) %19 to i64
+  call void @__asan_free_impl(i64 %22, i64 %21)
+  br label %End
+
+End:                                              ; preds = %Free, %CondFree
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #3
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.lds.kernel.id() #2
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="40" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 0}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="160" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2]] = !{i32 0}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
new file mode 100755
index 0000000000000..072174db11f25
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.my_kernel.md.type = type { %llvm.amdgcn.sw.lds.my_kernel.md.item, %llvm.amdgcn.sw.lds.my_kernel.md.item }
+%llvm.amdgcn.sw.lds.my_kernel.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 8, i32 4096, i32 4096 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 4096, i32 5120, i32 4128, i32 1024 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @my_function(ptr addrspace(3) %lds_arg) #0 {
+; CHECK-LABEL: define void @my_function(
+; CHECK-SAME: ptr addrspace(3) [[LDS_ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(3) null, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr addrspace(1) [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 2147450880
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP4]], 7
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 3
+; CHECK-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP11]] to i8
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sge i8 [[TMP12]], [[TMP8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = and i1 [[TMP9]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP14]])
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[ASAN_REPORT:.*]], label %[[BB19:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[BB17:.*]], label %[[BB18:.*]]
+; CHECK:       [[BB17]]:
+; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP4]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    br label %[[BB19]]
+; CHECK:       [[BB19]]:
+; CHECK-NEXT:    [[LDS_VAL:%.*]] = load i32, ptr addrspace(3) [[LDS_ARG]], align 4
+; CHECK-NEXT:    [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1
+; CHECK-NEXT:    store i32 [[NEW_LDS_VAL]], ptr addrspace(3) [[LDS_ARG]], align 4
+; CHECK-NEXT:    ret void
+;
+  %lds_val = load i32, ptr addrspace(3) %lds_arg, align 4
+  %new_lds_val = add i32 %lds_val, 1
+  store i32 %new_lds_val, ptr addrspace(3) %lds_arg, align 4
+  ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @my_kernel() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @my_kernel(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB26:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT:    br label %[[BB26]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP27]]
+; CHECK-NEXT:    [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP28]], i32 0, i32 0
+; CHECK-NEXT:    call void @my_function(ptr addrspace(3) [[LDS_PTR]])
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+; CHECK-NEXT:    [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
+; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+WId:
+  %0 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = or i32 %0, %1
+  %4 = or i32 %3, %2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %Malloc, label %14
+
+Malloc:                                           ; preds = %WId
+  %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+  %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+  %8 = add i32 %6, %7
+  %9 = zext i32 %8 to i64
+  %10 = call ptr @llvm.returnaddress(i32 0)
+  %11 = ptrtoint ptr %10 to i64
+  %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+  %13 = inttoptr i64 %12 to ptr addrspace(1)
+  store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+  br label %14
+
+14:                                               ; preds = %Malloc, %WId
+  %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+  call void @llvm.amdgcn.s.barrier()
+  %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+  %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 %15
+  %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) %16, i32 0, i32 0
+  call void @my_function(ptr addrspace(3) %lds_ptr)
+  br label %CondFree
+
+CondFree:                                         ; preds = %14
+  call void @llvm.amdgcn.s.barrier()
+  br i1 %xyzCond, label %Free, label %End
+
+Free:                                             ; preds = %CondFree
+  %17 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+  %18 = call ptr @llvm.returnaddress(i32 0)
+  %19 = ptrtoint ptr %18 to i64
+  %20 = ptrtoint ptr addrspace(1) %17 to i64
+  call void @__asan_free_impl(i64 %20, i64 %19)
+  br label %End
+
+End:                                              ; preds = %Free, %CondFree
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #3
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="4104" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="5152" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
new file mode 100755
index 0000000000000..99dbf6c607ab5
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
@@ -0,0 +1,249 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT:    [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
+; CHECK-NEXT:    br label %[[BB32]]
+; CHECK:       [[BB32]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]]
+; CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
+; CHECK-NEXT:    [[TMP38:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP38]], i32 [[TMP37]]
+; CHECK-NEXT:    [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
+; CHECK-NEXT:    [[TMP41:%.*]] = lshr i64 [[TMP40]], 3
+; CHECK-NEXT:    [[TMP42:%.*]] = add i64 [[TMP41]], 2147450880
+; CHECK-NEXT:    [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr
+; CHECK-NEXT:    [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
+; CHECK-NEXT:    [[TMP45:%.*]] = icmp ne i8 [[TMP44]], 0
+; CHECK-NEXT:    [[TMP46:%.*]] = and i64 [[TMP40]], 7
+; CHECK-NEXT:    [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8
+; CHECK-NEXT:    [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP44]]
+; CHECK-NEXT:    [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
+; CHECK-NEXT:    [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
+; CHECK-NEXT:    [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
+; CHECK-NEXT:    br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[ASAN_REPORT]]:
+; CHECK-NEXT:    br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
+; CHECK:       [[BB52]]:
+; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB53]]
+; CHECK:       [[BB53]]:
+; CHECK-NEXT:    br label %[[BB54]]
+; CHECK:       [[BB54]]:
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP34]], align 4
+; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i64
+; CHECK-NEXT:    [[TMP56:%.*]] = add i64 [[TMP55]], 3
+; CHECK-NEXT:    [[TMP57:%.*]] = inttoptr i64 [[TMP56]] to ptr addrspace(3)
+; CHECK-NEXT:    [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i32
+; CHECK-NEXT:    [[TMP59:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP59]], i32 [[TMP58]]
+; CHECK-NEXT:    [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP60]] to i64
+; CHECK-NEXT:    [[TMP62:%.*]] = lshr i64 [[TMP61]], 3
+; CHECK-NEXT:    [[TMP63:%.*]] = add i64 [[TMP62]], 2147450880
+; CHECK-NEXT:    [[TMP64:%.*]] = inttoptr i64 [[TMP63]] to ptr
+; CHECK-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
+; CHECK-NEXT:    [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0
+; CHECK-NEXT:    [[TMP67:%.*]] = and i64 [[TMP61]], 7
+; CHECK-NEXT:    [[TMP68:%.*]] = trunc i64 [[TMP67]] to i8
+; CHECK-NEXT:    [[TMP69:%.*]] = icmp sge i8 [[TMP68]], [[TMP65]]
+; CHECK-NEXT:    [[TMP70:%.*]] = and i1 [[TMP66]], [[TMP69]]
+; CHECK-NEXT:    [[TMP71:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP70]])
+; CHECK-NEXT:    [[TMP72:%.*]] = icmp ne i64 [[TMP71]], 0
+; CHECK-NEXT:    br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB75:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT1]]:
+; CHECK-NEXT:    br i1 [[TMP70]], label %[[BB73:.*]], label %[[BB74:.*]]
+; CHECK:       [[BB73]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP61]], i64 4) #[[ATTR7]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB74]]
+; CHECK:       [[BB74]]:
+; CHECK-NEXT:    br label %[[BB75]]
+; CHECK:       [[BB75]]:
+; CHECK-NEXT:    [[TMP76:%.*]] = ptrtoint ptr addrspace(3) [[TMP57]] to i32
+; CHECK-NEXT:    [[TMP77:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP77]], i32 [[TMP76]]
+; CHECK-NEXT:    [[TMP79:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64
+; CHECK-NEXT:    [[TMP80:%.*]] = lshr i64 [[TMP79]], 3
+; CHECK-NEXT:    [[TMP81:%.*]] = add i64 [[TMP80]], 2147450880
+; CHECK-NEXT:    [[TMP82:%.*]] = inttoptr i64 [[TMP81]] to ptr
+; CHECK-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
+; CHECK-NEXT:    [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0
+; CHECK-NEXT:    [[TMP85:%.*]] = and i64 [[TMP79]], 7
+; CHECK-NEXT:    [[TMP86:%.*]] = trunc i64 [[TMP85]] to i8
+; CHECK-NEXT:    [[TMP87:%.*]] = icmp sge i8 [[TMP86]], [[TMP83]]
+; CHECK-NEXT:    [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
+; CHECK-NEXT:    [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
+; CHECK-NEXT:    [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
+; CHECK-NEXT:    br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF1]]
+; CHECK:       [[ASAN_REPORT2]]:
+; CHECK-NEXT:    br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]]
+; CHECK:       [[BB91]]:
+; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP79]], i64 4) #[[ATTR7]]
+; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT:    br label %[[BB92]]
+; CHECK:       [[BB92]]:
+; CHECK-NEXT:    br label %[[BB93]]
+; CHECK:       [[BB93]]:
+; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP36]], align 2
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP94:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP95:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP96:%.*]] = ptrtoint ptr [[TMP95]] to i64
+; CHECK-NEXT:    [[TMP97:%.*]] = ptrtoint ptr addrspace(1) [[TMP94]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP97]], i64 [[TMP96]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+WId:
+  %0 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = or i32 %0, %1
+  %4 = or i32 %3, %2
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %Malloc, label %14
+
+Malloc:                                           ; preds = %WId
+  %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+  %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+  %8 = add i32 %6, %7
+  %9 = zext i32 %8 to i64
+  %10 = call ptr @llvm.returnaddress(i32 0)
+  %11 = ptrtoint ptr %10 to i64
+  %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+  %13 = inttoptr i64 %12 to ptr addrspace(1)
+  store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  br label %14
+
+14:                                               ; preds = %Malloc, %WId
+  %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+  call void @llvm.amdgcn.s.barrier()
+  %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+  %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15
+  %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+  %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17
+  store i8 7, ptr addrspace(3) %16, align 4
+  store i32 8, ptr addrspace(3) %18, align 2
+  br label %CondFree
+
+CondFree:                                         ; preds = %14
+  call void @llvm.amdgcn.s.barrier()
+  br i1 %xyzCond, label %Free, label %End
+
+Free:                                             ; preds = %CondFree
+  %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+  %20 = call ptr @llvm.returnaddress(i32 0)
+  %21 = ptrtoint ptr %20 to i64
+  %22 = ptrtoint ptr addrspace(1) %19 to i64
+  call void @__asan_free_impl(i64 %22, i64 %21)
+  br label %End
+
+End:                                              ; preds = %Free, %CondFree
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #2
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #3
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address "amdgpu-lds-size"="24" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="96" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR7]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll
deleted file mode 100644
index 44149b28fd9f9..0000000000000
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -passes=asan -S | FileCheck %s
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
-target triple = "amdgcn-amd-amdhsa"
-
-; Memory access to lds are not instrumented
-
- at count = addrspace(3) global [100 x i32] undef, align 16
-
-define protected amdgpu_kernel void @lds_store(i32 %i) sanitize_address {
-entry:
-  ; CHECK-LABEL: @lds_store(
-  ; CHECK-NOT: call {{[a-zA-Z]}}
-  %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i
-  store i32 0, ptr addrspace(3) %arrayidx1, align 4
-  ret void
-}
-
-define protected amdgpu_kernel void @lds_load(i32 %i) sanitize_address {
-entry:
-  ; CHECK-LABEL: @lds_load(
-  ; CHECK-NOT: call {{[a-zA-Z]}}
-  %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i
-  %0 = load i32, ptr addrspace(3) %arrayidx1, align 4
-  ret void
-}
-
-; CHECK-LABEL: define internal void @asan.module_ctor()
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
index cb37ba24f1c74..e5ce018ba0f40 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
@@ -9,12 +9,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
-; CHECK:       4:
+; CHECK:       2:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -31,15 +29,15 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0:![0-9]+]]
 ; CHECK:       asan.report:
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
-; CHECK:       18:
+; CHECK:       16:
 ; CHECK-NEXT:    call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
 ; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK:       17:
 ; CHECK-NEXT:    br label [[TMP20]]
-; CHECK:       20:
+; CHECK:       18:
 ; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
+; CHECK:       19:
 ; CHECK-NEXT:    store i32 0, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -47,12 +45,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 ; RECOV-NEXT:  entry:
 ; RECOV-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
-; RECOV:       4:
+; RECOV:       2:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -68,9 +64,9 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
 ; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store4_noabort(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
 ; RECOV-NEXT:    br label [[TMP16]]
-; RECOV:       16:
+; RECOV:       14:
 ; RECOV-NEXT:    br label [[TMP17]]
-; RECOV:       17:
+; RECOV:       15:
 ; RECOV-NEXT:    store i32 0, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -86,12 +82,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
-; CHECK:       4:
+; CHECK:       2:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -108,15 +102,15 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0]]
 ; CHECK:       asan.report:
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
-; CHECK:       18:
+; CHECK:       16:
 ; CHECK-NEXT:    call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR5]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
 ; CHECK-NEXT:    br label [[TMP19]]
-; CHECK:       19:
+; CHECK:       17:
 ; CHECK-NEXT:    br label [[TMP20]]
-; CHECK:       20:
+; CHECK:       18:
 ; CHECK-NEXT:    br label [[TMP21]]
-; CHECK:       21:
+; CHECK:       19:
 ; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -124,12 +118,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
 ; RECOV-NEXT:  entry:
 ; RECOV-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
-; RECOV:       4:
+; RECOV:       2:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -145,9 +137,9 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
 ; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load4_noabort(i64 [[TMP5]]) #[[ATTR3]]
 ; RECOV-NEXT:    br label [[TMP16]]
-; RECOV:       16:
+; RECOV:       14:
 ; RECOV-NEXT:    br label [[TMP17]]
-; RECOV:       17:
+; RECOV:       15:
 ; RECOV-NEXT:    [[R:%.*]] = load i32, ptr [[Q]], align 4
 ; RECOV-NEXT:    ret void
 ;
@@ -163,12 +155,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
-; CHECK:       4:
+; CHECK:       2:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -180,15 +170,15 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
 ; CHECK:       asan.report:
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
-; CHECK:       13:
+; CHECK:       11:
 ; CHECK-NEXT:    call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR5]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
 ; CHECK-NEXT:    br label [[TMP14]]
-; CHECK:       14:
+; CHECK:       12:
 ; CHECK-NEXT:    br label [[TMP15]]
-; CHECK:       15:
+; CHECK:       13:
 ; CHECK-NEXT:    br label [[TMP16]]
-; CHECK:       16:
+; CHECK:       14:
 ; CHECK-NEXT:    store i64 0, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -196,12 +186,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
 ; RECOV-NEXT:  entry:
 ; RECOV-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
-; RECOV:       4:
+; RECOV:       2:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -212,9 +200,9 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
 ; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_store8_noabort(i64 [[TMP5]]) #[[ATTR3]]
 ; RECOV-NEXT:    br label [[TMP11]]
-; RECOV:       11:
+; RECOV:       9:
 ; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
+; RECOV:       10:
 ; RECOV-NEXT:    store i64 0, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;
@@ -229,12 +217,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
-; CHECK:       4:
+; CHECK:       2:
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -246,15 +232,15 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
 ; CHECK:       asan.report:
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
-; CHECK:       13:
+; CHECK:       11:
 ; CHECK-NEXT:    call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR5]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
 ; CHECK-NEXT:    br label [[TMP14]]
-; CHECK:       14:
+; CHECK:       12:
 ; CHECK-NEXT:    br label [[TMP15]]
-; CHECK:       15:
+; CHECK:       13:
 ; CHECK-NEXT:    br label [[TMP16]]
-; CHECK:       16:
+; CHECK:       14:
 ; CHECK-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -262,12 +248,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
 ; RECOV-NEXT:  entry:
 ; RECOV-NEXT:    [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
 ; RECOV-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], true
 ; RECOV-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
-; RECOV:       4:
+; RECOV:       2:
 ; RECOV-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
 ; RECOV-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
 ; RECOV-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -278,9 +262,9 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
 ; RECOV:       asan.report:
 ; RECOV-NEXT:    call void @__asan_report_load8_noabort(i64 [[TMP5]]) #[[ATTR3]]
 ; RECOV-NEXT:    br label [[TMP11]]
-; RECOV:       11:
+; RECOV:       9:
 ; RECOV-NEXT:    br label [[TMP12]]
-; RECOV:       12:
+; RECOV:       10:
 ; RECOV-NEXT:    [[R:%.*]] = load i64, ptr [[Q]], align 8
 ; RECOV-NEXT:    ret void
 ;

>From b761645f68227f6c8eaec57941fd61c62ad54603 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 23 May 2024 13:01:22 +0530
Subject: [PATCH 2/2] [AMDGPU] Enable flag to choose instrumentation of LDS.
 [AMDGPU] Update the MD initializer and donot replace uses of MD global.

---
 .../Instrumentation/AddressSanitizer.cpp      | 140 ++++++------------
 .../asan-dynamic-lds-indirect-access.ll       | 127 ++++++----------
 .../AMDGPU/asan-dynamic-lds-test.ll           |  66 +++------
 .../AMDGPU/asan-static-indirect-access.ll     | 126 ++++++----------
 ...atic-lds-indirect-access-function-param.ll |  53 +++----
 .../AMDGPU/asan-static-lds-test.ll            | 105 +++++--------
 6 files changed, 224 insertions(+), 393 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index a312f1d50fe9a..2cead6cdc273a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -232,6 +232,11 @@ static cl::opt<bool>
                       cl::desc("instrument byval call arguments"), cl::Hidden,
                       cl::init(true));
 
+static cl::opt<bool>
+    ClInstrumentAMDGPULDS("asan-instrument-amdgpu-lds",
+                          cl::desc("instrument amdgpu LDS accesses"),
+                          cl::Hidden, cl::init(true));
+
 static cl::opt<bool> ClAlwaysSlowPath(
     "asan-always-slow-path",
     cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
@@ -1293,8 +1298,8 @@ static GlobalVariable *getKernelSwLDSBaseGlobal(Module &M) {
 static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset,
                                 bool UsesDynLDS) {
   if (Offset != 0) {
-    std::string Buffer;
-    raw_string_ostream SS{Buffer};
+    SmallString<256> Buffer;
+    raw_svector_ostream SS(Buffer);
     SS << format("%u", Offset);
     if (UsesDynLDS)
       SS << format(",%u", Offset);
@@ -1312,33 +1317,30 @@ static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV,
                   MDNode::get(Ctx, {MinC, MaxC}));
 }
 
-static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
+/// Update SwLDS Metadata global initializer with redzone info.
+static SmallVector<std::pair<uint32_t, uint32_t>, 64>
+UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
   Module *M = F.getParent();
   GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
   GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
   if (!SwLDSMetadataGlobal || !SwLDSGlobal)
-    return;
+    return {};
 
   LLVMContext &Ctx = M->getContext();
   Type *Int32Ty = Type::getInt32Ty(Ctx);
-
+  SmallVector<std::pair<uint32_t, uint32_t>, 64> RedzoneOffsetAndSizeVector;
   Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
-  Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne());
   Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne());
 
   StructType *MDStructType =
       cast<StructType>(SwLDSMetadataGlobal->getValueType());
-  assert(MDStructType);
   unsigned NumStructs = MDStructType->getNumElements();
-
-  std::vector<Type *> Items;
   std::vector<Constant *> Initializers;
   uint32_t MallocSize = 0;
-  //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size}
-  StructType *LDSItemTy = StructType::create(
-      Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int32Ty}, "");
+  StructType *LDSItemTy =
+      cast<StructType>(MDStructType->getStructElementType(0));
+
   for (unsigned i = 0; i < NumStructs; i++) {
-    Items.push_back(LDSItemTy);
     ConstantStruct *member =
         dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
     Constant *NewInitItem;
@@ -1353,91 +1355,48 @@ static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
         const uint64_t RightRedzoneSize =
             getRedzoneSizeForGlobal(Scale, GlobalSizeValue);
         MallocSize += GlobalSizeValue;
-        Constant *NewItemRedzoneStartOffset =
-            ConstantInt::get(Int32Ty, MallocSize);
+        RedzoneOffsetAndSizeVector.emplace_back(MallocSize, RightRedzoneSize);
         MallocSize += RightRedzoneSize;
-        Constant *NewItemRedzoneSize =
-            ConstantInt::get(Int32Ty, RightRedzoneSize);
-
         unsigned NewItemAlignGlobalPlusRedzoneSize =
             alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign);
         Constant *NewItemAlignGlobalPlusRedzoneSizeConst =
             ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize);
         NewInitItem = ConstantStruct::get(
             LDSItemTy, {NewItemStartOffset, NewItemGlobalSizeConst,
-                        NewItemAlignGlobalPlusRedzoneSizeConst,
-                        NewItemRedzoneStartOffset, NewItemRedzoneSize});
+                        NewItemAlignGlobalPlusRedzoneSizeConst});
         MallocSize = alignTo(MallocSize, LDSAlign);
       } else {
         Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
         Constant *zero = ConstantInt::get(Int32Ty, 0);
-        NewInitItem = ConstantStruct::get(
-            LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+        NewInitItem =
+            ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero});
+        RedzoneOffsetAndSizeVector.emplace_back(0, 0);
       }
     } else {
       Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
       Constant *zero = ConstantInt::get(Int32Ty, 0);
-      NewInitItem = ConstantStruct::get(
-          LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+      NewInitItem =
+          ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero});
+      RedzoneOffsetAndSizeVector.emplace_back(0, 0);
     }
     Initializers.push_back(NewInitItem);
   }
   GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F);
-  bool usesDynLDS = SwDynLDS ? true : false;
+  bool usesDynLDS = SwDynLDS != nullptr;
   updateLDSSizeFnAttr(&F, MallocSize, usesDynLDS);
   if (usesDynLDS)
     recordLDSAbsoluteAddress(*M, SwDynLDS, MallocSize);
 
-  StructType *MetadataStructType = StructType::create(Ctx, Items, "");
-
-  GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable(
-      *M, MetadataStructType, false, GlobalValue::InternalLinkage,
-      PoisonValue::get(MetadataStructType), "", nullptr,
-      GlobalValue::NotThreadLocal, 1, false);
-  Constant *Data = ConstantStruct::get(MetadataStructType, Initializers);
-  NewSwLDSMetadataGlobal->setInitializer(Data);
-  NewSwLDSMetadataGlobal->setAlignment(MdAlign);
-  GlobalValue::SanitizerMetadata MD;
-  MD.NoAddress = true;
-  NewSwLDSMetadataGlobal->setSanitizerMetadata(MD);
-
-  for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(U.getUser())) {
-      SmallVector<Constant *> Indices;
-      for (Use &Idx : GEP->indices()) {
-        Indices.push_back(cast<Constant>(Idx));
-      }
-      Constant *NewGEP = ConstantExpr::getGetElementPtr(
-          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
-      GEP->replaceAllUsesWith(NewGEP);
-    } else if (LoadInst *Load = dyn_cast<LoadInst>(U.getUser())) {
-      Constant *zero = ConstantInt::get(Int32Ty, 0);
-      SmallVector<Constant *> Indices{zero, zero, zero};
-      Constant *NewGEP = ConstantExpr::getGetElementPtr(
-          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
-      IRBuilder<> IRB(Load);
-      LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP);
-      Load->replaceAllUsesWith(NewLoad);
-      Load->eraseFromParent();
-    } else if (StoreInst *Store = dyn_cast<StoreInst>(U.getUser())) {
-      Constant *zero = ConstantInt::get(Int32Ty, 0);
-      SmallVector<Constant *> Indices{zero, zero, zero};
-      Constant *NewGEP = ConstantExpr::getGetElementPtr(
-          MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
-      IRBuilder<> IRB(Store);
-      StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP);
-      Store->replaceAllUsesWith(NewStore);
-      Store->eraseFromParent();
-    } else
-      report_fatal_error("AMDGPU Sw LDS Metadata User instruction not handled");
-  }
-  SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal);
-  NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal);
-  SwLDSMetadataGlobal->eraseFromParent();
-  return;
+  Constant *Data = ConstantStruct::get(MDStructType, Initializers);
+  SwLDSMetadataGlobal->setInitializer(Data);
+  return RedzoneOffsetAndSizeVector;
 }
 
-static void poisonRedzonesForSwLDS(Function &F) {
+/// Poison redzone regions using the redzone size and offset info.
+static void
+poisonRedzonesForSwLDS(Function &F,
+                       SmallVector<std::pair<uint32_t, uint32_t>, 64>
+                           &RedzoneOffsetAndSizeVector) {
   Module *M = F.getParent();
   GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
   GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
@@ -1470,6 +1429,7 @@ static void poisonRedzonesForSwLDS(Function &F) {
     unsigned NumStructs = MDStructType->getNumElements();
     Value *StoreMallocPointer = SI->getValueOperand();
 
+    assert(RedzoneOffsetAndSizeVector.size() == NumStructs);
     for (unsigned i = 0; i < NumStructs; i++) {
       ConstantStruct *member =
           dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
@@ -1484,35 +1444,28 @@ static void poisonRedzonesForSwLDS(Function &F) {
         continue;
       IRBuilder<> IRB(SI);
       IRB.SetInsertPoint(SI->getNextNode());
+      auto &RedzonePair = RedzoneOffsetAndSizeVector[i];
+      uint64_t RedzoneOffset = RedzonePair.first;
+      uint64_t RedzoneSize = RedzonePair.second;
 
-      auto *GEPForOffset = IRB.CreateInBoundsGEP(
-          MDStructType, SwLDSMetadataGlobal,
-          {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)});
-
-      auto *GEPForSize = IRB.CreateInBoundsGEP(
-          MDStructType, SwLDSMetadataGlobal,
-          {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(4)});
-
-      Value *RedzoneOffset = IRB.CreateLoad(IRB.getInt32Ty(), GEPForOffset);
-      RedzoneOffset = IRB.CreateZExt(RedzoneOffset, IRB.getInt64Ty());
       Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP(
-          IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset});
+          IRB.getInt8Ty(), StoreMallocPointer, {IRB.getInt64(RedzoneOffset)});
       Value *RedzoneAddress =
           IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty());
-      Value *RedzoneSize = IRB.CreateLoad(IRB.getInt32Ty(), GEPForSize);
-      RedzoneSize = IRB.CreateZExt(RedzoneSize, IRB.getInt64Ty());
-      IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, RedzoneSize});
+      IRB.CreateCall(AsanPoisonRegion,
+                     {RedzoneAddress, IRB.getInt64(RedzoneSize)});
     }
   }
-  return;
 }
 
+/// Update SwLDS Metadata global initializer with redzone info.
+/// Poison redzone regions using the redzone size and offset info.
 static void preProcessAMDGPULDSAccesses(Module &M, int Scale) {
   for (Function &F : M) {
-    UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
-    poisonRedzonesForSwLDS(F);
+    auto RedzoneOffsetAndSizeVector =
+        UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
+    poisonRedzonesForSwLDS(F, RedzoneOffsetAndSizeVector);
   }
-  return;
 }
 
 AddressSanitizerPass::AddressSanitizerPass(
@@ -1527,7 +1480,7 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M,
                                             ModuleAnalysisManager &MAM) {
   Triple TargetTriple = Triple(M.getTargetTriple());
 
-  if (TargetTriple.isAMDGPU()) {
+  if (TargetTriple.isAMDGPU() && ClInstrumentAMDGPULDS) {
     unsigned LongSize = M.getDataLayout().getPointerSizeInBits();
     ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false);
     preProcessAMDGPULDSAccesses(M, Mapping.Scale);
@@ -2147,7 +2100,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   }
 
   Value *AddrLong;
-  if (TargetTriple.isAMDGCN()) {
+  if (TargetTriple.isAMDGPU() && ClInstrumentAMDGPULDS) {
     Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
     if (PtrTy->getPointerAddressSpace() == 3) {
       Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -2168,6 +2121,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
           SwLDS = IRB.CreateIntToPtr(IRB.getInt32(0), IRB.getPtrTy(3));
         }
       }
+      assert(SwLDS && "Invalid AMDGPU Sw LDS base ptr");
       Value *PtrToInt = IRB.CreatePtrToInt(Addr, IRB.getInt32Ty());
       Value *LoadMallocPtr = IRB.CreateLoad(IRB.getPtrTy(1), SwLDS);
       Value *GEP =
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
index f37fbf350ffde..8ef11f313f765 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 %llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
 %llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -11,12 +11,11 @@
 @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
 
 ; Function Attrs: sanitize_address
-;.
 ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
 ; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 0 } }, no_sanitize_address
 ; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
 ; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
 ; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
 ; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -229,14 +228,14 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB60:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB48:.*]]
 ; CHECK:       [[MALLOC]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
-; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
 ; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
@@ -261,19 +260,19 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    br label %[[BB26]]
 ; CHECK:       [[BB26]]:
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 7
 ; CHECK-NEXT:    [[TMP29:%.*]] = udiv i32 [[TMP28]], 8
 ; CHECK-NEXT:    [[TMP30:%.*]] = mul i32 [[TMP29]], 8
-; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
-; CHECK-NEXT:    store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT:    store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT:    store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+; CHECK-NEXT:    store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
 ; CHECK-NEXT:    [[TMP33:%.*]] = add i32 [[TMP32]], 7
 ; CHECK-NEXT:    [[TMP34:%.*]] = udiv i32 [[TMP33]], 8
 ; CHECK-NEXT:    [[TMP35:%.*]] = mul i32 [[TMP34]], 8
-; CHECK-NEXT:    store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT:    store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP36:%.*]] = add i32 [[TMP31]], [[TMP35]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
 ; CHECK-NEXT:    [[TMP38:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -281,34 +280,22 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    [[TMP40:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP37]], i64 [[TMP39]])
 ; CHECK-NEXT:    [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(1)
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP41]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
-; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP43]]
+; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 68
 ; CHECK-NEXT:    [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
-; CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT:    [[TMP47:%.*]] = zext i32 [[TMP46]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP45]], i64 [[TMP47]])
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT:    [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
-; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP49]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP45]], i64 28)
+; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 33
 ; CHECK-NEXT:    [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP50]] to i64
-; CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT:    [[TMP53:%.*]] = zext i32 [[TMP52]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP51]], i64 [[TMP53]])
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP55:%.*]] = zext i32 [[TMP54]] to i64
-; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP55]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP51]], i64 31)
+; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 8
 ; CHECK-NEXT:    [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
-; CHECK-NEXT:    [[TMP58:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP59:%.*]] = zext i32 [[TMP58]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP57]], i64 [[TMP59]])
-; CHECK-NEXT:    br label %[[BB60]]
-; CHECK:       [[BB60]]:
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP57]], i64 24)
+; CHECK-NEXT:    br label %[[BB48]]
+; CHECK:       [[BB48]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP61]]
-; CHECK-NEXT:    [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP63]]
 ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
 ; CHECK-NEXT:    call void @use_variables()
@@ -327,16 +314,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    [[TMP77:%.*]] = and i1 [[TMP73]], [[TMP76]]
 ; CHECK-NEXT:    [[TMP78:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP77]])
 ; CHECK-NEXT:    [[TMP79:%.*]] = icmp ne i64 [[TMP78]], 0
-; CHECK-NEXT:    br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB82:.*]], !prof [[PROF2]]
+; CHECK-NEXT:    br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB70:.*]], !prof [[PROF2]]
 ; CHECK:       [[ASAN_REPORT1]]:
-; CHECK-NEXT:    br i1 [[TMP77]], label %[[BB80:.*]], label %[[BB81:.*]]
-; CHECK:       [[BB80]]:
+; CHECK-NEXT:    br i1 [[TMP77]], label %[[BB68:.*]], label %[[BB69:.*]]
+; CHECK:       [[BB68]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP68]]) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB81]]
-; CHECK:       [[BB81]]:
-; CHECK-NEXT:    br label %[[BB82]]
-; CHECK:       [[BB82]]:
+; CHECK-NEXT:    br label %[[BB69]]
+; CHECK:       [[BB69]]:
+; CHECK-NEXT:    br label %[[BB70]]
+; CHECK:       [[BB70]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP62]], align 1
 ; CHECK-NEXT:    [[TMP83:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i64
 ; CHECK-NEXT:    [[TMP84:%.*]] = add i64 [[TMP83]], 3
@@ -356,16 +343,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    [[TMP98:%.*]] = and i1 [[TMP94]], [[TMP97]]
 ; CHECK-NEXT:    [[TMP99:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP98]])
 ; CHECK-NEXT:    [[TMP100:%.*]] = icmp ne i64 [[TMP99]], 0
-; CHECK-NEXT:    br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB103:.*]], !prof [[PROF2]]
+; CHECK-NEXT:    br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB91:.*]], !prof [[PROF2]]
 ; CHECK:       [[ASAN_REPORT2]]:
-; CHECK-NEXT:    br i1 [[TMP98]], label %[[BB101:.*]], label %[[BB102:.*]]
-; CHECK:       [[BB101]]:
+; CHECK-NEXT:    br i1 [[TMP98]], label %[[BB89:.*]], label %[[BB90:.*]]
+; CHECK:       [[BB89]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP89]], i64 4) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB102]]
-; CHECK:       [[BB102]]:
-; CHECK-NEXT:    br label %[[BB103]]
-; CHECK:       [[BB103]]:
+; CHECK-NEXT:    br label %[[BB90]]
+; CHECK:       [[BB90]]:
+; CHECK-NEXT:    br label %[[BB91]]
+; CHECK:       [[BB91]]:
 ; CHECK-NEXT:    [[TMP104:%.*]] = ptrtoint ptr addrspace(3) [[TMP85]] to i32
 ; CHECK-NEXT:    [[TMP105:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP105]], i32 [[TMP104]]
@@ -381,16 +368,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
 ; CHECK-NEXT:    [[TMP116:%.*]] = and i1 [[TMP112]], [[TMP115]]
 ; CHECK-NEXT:    [[TMP117:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP116]])
 ; CHECK-NEXT:    [[TMP118:%.*]] = icmp ne i64 [[TMP117]], 0
-; CHECK-NEXT:    br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB121:.*]], !prof [[PROF2]]
+; CHECK-NEXT:    br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB109:.*]], !prof [[PROF2]]
 ; CHECK:       [[ASAN_REPORT3]]:
-; CHECK-NEXT:    br i1 [[TMP116]], label %[[BB119:.*]], label %[[BB120:.*]]
-; CHECK:       [[BB119]]:
+; CHECK-NEXT:    br i1 [[TMP116]], label %[[BB107:.*]], label %[[BB108:.*]]
+; CHECK:       [[BB107]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP107]], i64 4) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB120]]
-; CHECK:       [[BB120]]:
-; CHECK-NEXT:    br label %[[BB121]]
-; CHECK:       [[BB121]]:
+; CHECK-NEXT:    br label %[[BB108]]
+; CHECK:       [[BB108]]:
+; CHECK-NEXT:    br label %[[BB109]]
+; CHECK:       [[BB109]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP64]], align 2
 ; CHECK-NEXT:    br label %[[CONDFREE:.*]]
 ; CHECK:       [[CONDFREE]]:
@@ -415,7 +402,7 @@ WId:
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %Malloc, label %26
 
-Malloc:                                           ; preds = %WId
+Malloc:
   %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
   %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
   %8 = add i32 %6, %7
@@ -445,7 +432,7 @@ Malloc:                                           ; preds = %WId
   store ptr addrspace(1) %25, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   br label %26
 
-26:                                               ; preds = %Malloc, %WId
+26:
   %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
   call void @llvm.amdgcn.s.barrier()
   %27 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -458,11 +445,11 @@ Malloc:                                           ; preds = %WId
   store i32 8, ptr addrspace(3) %30, align 2
   br label %CondFree
 
-CondFree:                                         ; preds = %26
+CondFree:
   call void @llvm.amdgcn.s.barrier()
   br i1 %xyzCond, label %Free, label %End
 
-Free:                                             ; preds = %CondFree
+Free:
   %31 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   %32 = call ptr @llvm.returnaddress(i32 0)
   %33 = ptrtoint ptr %32 to i64
@@ -470,36 +457,20 @@ Free:                                             ; preds = %CondFree
   call void @__asan_free_impl(i64 %34, i64 %33)
   br label %End
 
-End:                                              ; preds = %Free, %CondFree
+End:
   ret void
 }
 
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
-declare void @llvm.donothing() #2
 
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.donothing() #2
 declare i32 @llvm.amdgcn.workitem.id.x() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.y() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.z() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare ptr @llvm.returnaddress(i32 immarg) #2
-
 declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
 declare void @llvm.amdgcn.s.barrier() #4
-
 declare void @__asan_free_impl(i64, i64)
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.lds.kernel.id() #3
 
 attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
index 1be50f48aa6f6..e64a6fb8aca5f 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 %llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
 %llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -12,7 +12,7 @@
 ;.
 ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
 ; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 0 } }, no_sanitize_address
 ; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
 ; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
 ; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -29,14 +29,14 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB43:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB39:.*]]
 ; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
-; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
 ; CHECK-NEXT:    [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
@@ -61,11 +61,11 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    br label %[[BB26]]
 ; CHECK:       [[BB26]]:
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT:    store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 0
 ; CHECK-NEXT:    [[TMP29:%.*]] = udiv i32 [[TMP28]], 1
 ; CHECK-NEXT:    [[TMP30:%.*]] = mul i32 [[TMP29]], 1
-; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
 ; CHECK-NEXT:    [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -73,18 +73,14 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]])
 ; CHECK-NEXT:    [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP38:%.*]] = zext i32 [[TMP37]] to i64
-; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 [[TMP38]]
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 8
 ; CHECK-NEXT:    [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
-; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP42:%.*]] = zext i32 [[TMP41]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP40]], i64 [[TMP42]])
-; CHECK-NEXT:    br label %[[BB43]]
-; CHECK:       [[BB43]]:
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP40]], i64 24)
+; CHECK-NEXT:    br label %[[BB39]]
+; CHECK:       [[BB39]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]]
 ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
 ; CHECK-NEXT:    [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32
@@ -102,16 +98,16 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
 ; CHECK-NEXT:    [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
-; CHECK-NEXT:    br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF2]]
+; CHECK-NEXT:    br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB59:.*]], !prof [[PROF2]]
 ; CHECK:       [[ASAN_REPORT1]]:
-; CHECK-NEXT:    br i1 [[TMP58]], label %[[BB61:.*]], label %[[BB62:.*]]
-; CHECK:       [[BB61]]:
+; CHECK-NEXT:    br i1 [[TMP58]], label %[[BB57:.*]], label %[[BB58:.*]]
+; CHECK:       [[BB57]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB62]]
-; CHECK:       [[BB62]]:
-; CHECK-NEXT:    br label %[[BB63]]
-; CHECK:       [[BB63]]:
+; CHECK-NEXT:    br label %[[BB58]]
+; CHECK:       [[BB58]]:
+; CHECK-NEXT:    br label %[[BB59]]
+; CHECK:       [[BB59]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP45]], align 4
 ; CHECK-NEXT:    br label %[[CONDFREE:.*]]
 ; CHECK:       [[CONDFREE]]:
@@ -136,7 +132,7 @@ WId:
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %Malloc, label %21
 
-Malloc:                                           ; preds = %WId
+Malloc:
   %6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
   %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
   %8 = add i32 %6, %7
@@ -158,7 +154,7 @@ Malloc:                                           ; preds = %WId
   store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   br label %21
 
-21:                                               ; preds = %Malloc, %WId
+21:
   %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
   call void @llvm.amdgcn.s.barrier()
   %22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -167,11 +163,11 @@ Malloc:                                           ; preds = %WId
   store i8 7, ptr addrspace(3) %23, align 4
   br label %CondFree
 
-CondFree:                                         ; preds = %21
+CondFree:
   call void @llvm.amdgcn.s.barrier()
   br i1 %xyzCond, label %Free, label %End
 
-Free:                                             ; preds = %CondFree
+Free:
   %24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   %25 = call ptr @llvm.returnaddress(i32 0)
   %26 = ptrtoint ptr %25 to i64
@@ -179,31 +175,17 @@ Free:                                             ; preds = %CondFree
   call void @__asan_free_impl(i64 %27, i64 %26)
   br label %End
 
-End:                                              ; preds = %Free, %CondFree
+End:
   ret void
 }
 
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare void @llvm.donothing() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare ptr @llvm.returnaddress(i32 immarg) #1
-
 declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
 declare void @llvm.amdgcn.s.barrier() #3
 
 declare void @__asan_free_impl(i64, i64)
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
index 23f27aa797e73..57d91cafcab56 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 %llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
 %llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -10,11 +10,10 @@
 @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
 
 ; Function Attrs: sanitize_address
-;.
 ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
 ; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 3, i32 32, i32 99, i32 29 }, %1 { i32 128, i32 4, i32 32, i32 132, i32 28 } }, no_sanitize_address, align 1
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
 ; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
 ; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
 ; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -231,10 +230,10 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB44:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
 ; CHECK:       [[MALLOC]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -242,48 +241,28 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 3), align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 4), align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 3), align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
 ; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 4), align 4
-; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 29)
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
 ; CHECK-NEXT:    [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT:    [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
-; CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT:    [[TMP33:%.*]] = zext i32 [[TMP32]] to i64
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP33]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 28)
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
 ; CHECK-NEXT:    [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP34]] to i64
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT:    [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP35]], i64 [[TMP37]])
-; CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP39:%.*]] = zext i32 [[TMP38]] to i64
-; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP39]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP35]], i64 31)
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
 ; CHECK-NEXT:    [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP41]], i64 [[TMP43]])
-; CHECK-NEXT:    br label %[[BB44]]
-; CHECK:       [[BB44]]:
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP41]], i64 24)
+; CHECK-NEXT:    br label %[[BB24]]
+; CHECK:       [[BB24]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP45]]
-; CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP47]]
 ; CHECK-NEXT:    call void @use_variables()
 ; CHECK-NEXT:    [[TMP49:%.*]] = ptrtoint ptr addrspace(3) [[TMP46]] to i32
@@ -301,16 +280,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
 ; CHECK-NEXT:    [[TMP61:%.*]] = and i1 [[TMP57]], [[TMP60]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP61]])
 ; CHECK-NEXT:    [[TMP63:%.*]] = icmp ne i64 [[TMP62]], 0
-; CHECK-NEXT:    br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB66:.*]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF1]]
 ; CHECK:       [[ASAN_REPORT]]:
-; CHECK-NEXT:    br i1 [[TMP61]], label %[[BB64:.*]], label %[[BB65:.*]]
-; CHECK:       [[BB64]]:
+; CHECK-NEXT:    br i1 [[TMP61]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK:       [[BB44]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP52]]) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB65]]
-; CHECK:       [[BB65]]:
-; CHECK-NEXT:    br label %[[BB66]]
-; CHECK:       [[BB66]]:
+; CHECK-NEXT:    br label %[[BB45]]
+; CHECK:       [[BB45]]:
+; CHECK-NEXT:    br label %[[BB46]]
+; CHECK:       [[BB46]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP46]], align 1
 ; CHECK-NEXT:    [[TMP67:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i64
 ; CHECK-NEXT:    [[TMP68:%.*]] = add i64 [[TMP67]], 3
@@ -330,16 +309,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
 ; CHECK-NEXT:    [[TMP82:%.*]] = and i1 [[TMP78]], [[TMP81]]
 ; CHECK-NEXT:    [[TMP83:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP82]])
 ; CHECK-NEXT:    [[TMP84:%.*]] = icmp ne i64 [[TMP83]], 0
-; CHECK-NEXT:    br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB87:.*]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB67:.*]], !prof [[PROF1]]
 ; CHECK:       [[ASAN_REPORT1]]:
-; CHECK-NEXT:    br i1 [[TMP82]], label %[[BB85:.*]], label %[[BB86:.*]]
-; CHECK:       [[BB85]]:
+; CHECK-NEXT:    br i1 [[TMP82]], label %[[BB65:.*]], label %[[BB66:.*]]
+; CHECK:       [[BB65]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP73]], i64 4) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB86]]
-; CHECK:       [[BB86]]:
-; CHECK-NEXT:    br label %[[BB87]]
-; CHECK:       [[BB87]]:
+; CHECK-NEXT:    br label %[[BB66]]
+; CHECK:       [[BB66]]:
+; CHECK-NEXT:    br label %[[BB67]]
+; CHECK:       [[BB67]]:
 ; CHECK-NEXT:    [[TMP88:%.*]] = ptrtoint ptr addrspace(3) [[TMP69]] to i32
 ; CHECK-NEXT:    [[TMP89:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP89]], i32 [[TMP88]]
@@ -355,16 +334,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
 ; CHECK-NEXT:    [[TMP100:%.*]] = and i1 [[TMP96]], [[TMP99]]
 ; CHECK-NEXT:    [[TMP101:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP100]])
 ; CHECK-NEXT:    [[TMP102:%.*]] = icmp ne i64 [[TMP101]], 0
-; CHECK-NEXT:    br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB105:.*]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB85:.*]], !prof [[PROF1]]
 ; CHECK:       [[ASAN_REPORT2]]:
-; CHECK-NEXT:    br i1 [[TMP100]], label %[[BB103:.*]], label %[[BB104:.*]]
-; CHECK:       [[BB103]]:
+; CHECK-NEXT:    br i1 [[TMP100]], label %[[BB83:.*]], label %[[BB84:.*]]
+; CHECK:       [[BB83]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP91]], i64 4) #[[ATTR8]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB104]]
-; CHECK:       [[BB104]]:
-; CHECK-NEXT:    br label %[[BB105]]
-; CHECK:       [[BB105]]:
+; CHECK-NEXT:    br label %[[BB84]]
+; CHECK:       [[BB84]]:
+; CHECK-NEXT:    br label %[[BB85]]
+; CHECK:       [[BB85]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP48]], align 2
 ; CHECK-NEXT:    br label %[[CONDFREE:.*]]
 ; CHECK:       [[CONDFREE]]:
@@ -389,7 +368,7 @@ WId:
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %Malloc, label %14
 
-Malloc:                                           ; preds = %WId
+Malloc:
   %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
   %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
   %8 = add i32 %6, %7
@@ -401,7 +380,7 @@ Malloc:                                           ; preds = %WId
   store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   br label %14
 
-14:                                               ; preds = %Malloc, %WId
+14:
   %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
   call void @llvm.amdgcn.s.barrier()
   %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -413,11 +392,11 @@ Malloc:                                           ; preds = %WId
   store i32 8, ptr addrspace(3) %18, align 2
   br label %CondFree
 
-CondFree:                                         ; preds = %14
+CondFree:
   call void @llvm.amdgcn.s.barrier()
   br i1 %xyzCond, label %Free, label %End
 
-Free:                                             ; preds = %CondFree
+Free:
   %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   %20 = call ptr @llvm.returnaddress(i32 0)
   %21 = ptrtoint ptr %20 to i64
@@ -425,30 +404,17 @@ Free:                                             ; preds = %CondFree
   call void @__asan_free_impl(i64 %22, i64 %21)
   br label %End
 
-End:                                              ; preds = %Free, %CondFree
+End:
   ret void
 }
 
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare ptr @llvm.returnaddress(i32 immarg) #3
-
 declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
 declare void @llvm.amdgcn.s.barrier() #4
-
 declare void @__asan_free_impl(i64, i64)
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.lds.kernel.id() #2
 
 attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
index 072174db11f25..c14f772cb1b02 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 %llvm.amdgcn.sw.lds.my_kernel.md.type = type { %llvm.amdgcn.sw.lds.my_kernel.md.item, %llvm.amdgcn.sw.lds.my_kernel.md.item }
 %llvm.amdgcn.sw.lds.my_kernel.md.item = type { i32, i32, i32 }
@@ -10,7 +10,7 @@
 ; Function Attrs: sanitize_address
 ;.
 ; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 4096, i32 5120, i32 4128, i32 1024 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address
 ; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
 ; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
 ; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -68,10 +68,10 @@ define amdgpu_kernel void @my_kernel() #1 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB26:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
 ; CHECK:       [[MALLOC]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -79,25 +79,17 @@ define amdgpu_kernel void @my_kernel() #1 {
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 4128
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 1024)
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
 ; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT:    br label %[[BB26]]
-; CHECK:       [[BB26]]:
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 24)
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP27]]
 ; CHECK-NEXT:    [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP28]], i32 0, i32 0
 ; CHECK-NEXT:    call void @my_function(ptr addrspace(3) [[LDS_PTR]])
@@ -124,7 +116,7 @@ WId:
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %Malloc, label %14
 
-Malloc:                                           ; preds = %WId
+Malloc:
   %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
   %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
   %8 = add i32 %6, %7
@@ -136,7 +128,7 @@ Malloc:                                           ; preds = %WId
   store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
   br label %14
 
-14:                                               ; preds = %Malloc, %WId
+14:
   %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
   call void @llvm.amdgcn.s.barrier()
   %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -145,11 +137,11 @@ Malloc:                                           ; preds = %WId
   call void @my_function(ptr addrspace(3) %lds_ptr)
   br label %CondFree
 
-CondFree:                                         ; preds = %14
+CondFree:
   call void @llvm.amdgcn.s.barrier()
   br i1 %xyzCond, label %Free, label %End
 
-Free:                                             ; preds = %CondFree
+Free:
   %17 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
   %18 = call ptr @llvm.returnaddress(i32 0)
   %19 = ptrtoint ptr %18 to i64
@@ -157,27 +149,16 @@ Free:                                             ; preds = %CondFree
   call void @__asan_free_impl(i64 %20, i64 %19)
   br label %End
 
-End:                                              ; preds = %Free, %CondFree
+End:
   ret void
 }
 
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare ptr @llvm.returnaddress(i32 immarg) #3
-
 declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
 declare void @llvm.amdgcn.s.barrier() #4
-
 declare void @__asan_free_impl(i64, i64)
 
 attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
index 99dbf6c607ab5..f3a905882153d 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 %llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
 %llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -10,7 +10,7 @@
 ; Function Attrs: sanitize_address
 ;.
 ; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
 ; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
 ; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
 ; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -27,10 +27,10 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
 ; CHECK:       [[MALLOC]]:
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -38,34 +38,22 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
 ; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
 ; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 31)
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
 ; CHECK-NEXT:    [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
-; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
-; CHECK-NEXT:    br label %[[BB32]]
-; CHECK:       [[BB32]]:
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP29]], i64 24)
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
 ; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]]
-; CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP35]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
 ; CHECK-NEXT:    [[TMP38:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
@@ -82,16 +70,16 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
 ; CHECK-NEXT:    [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
 ; CHECK-NEXT:    [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
-; CHECK-NEXT:    br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB42:.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       [[ASAN_REPORT]]:
-; CHECK-NEXT:    br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
-; CHECK:       [[BB52]]:
+; CHECK-NEXT:    br i1 [[TMP49]], label %[[BB40:.*]], label %[[BB41:.*]]
+; CHECK:       [[BB40]]:
 ; CHECK-NEXT:    call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB53]]
-; CHECK:       [[BB53]]:
-; CHECK-NEXT:    br label %[[BB54]]
-; CHECK:       [[BB54]]:
+; CHECK-NEXT:    br label %[[BB41]]
+; CHECK:       [[BB41]]:
+; CHECK-NEXT:    br label %[[BB42]]
+; CHECK:       [[BB42]]:
 ; CHECK-NEXT:    store i8 7, ptr addrspace(3) [[TMP34]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i64
 ; CHECK-NEXT:    [[TMP56:%.*]] = add i64 [[TMP55]], 3
@@ -111,16 +99,16 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP70:%.*]] = and i1 [[TMP66]], [[TMP69]]
 ; CHECK-NEXT:    [[TMP71:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP70]])
 ; CHECK-NEXT:    [[TMP72:%.*]] = icmp ne i64 [[TMP71]], 0
-; CHECK-NEXT:    br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB75:.*]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF1]]
 ; CHECK:       [[ASAN_REPORT1]]:
-; CHECK-NEXT:    br i1 [[TMP70]], label %[[BB73:.*]], label %[[BB74:.*]]
-; CHECK:       [[BB73]]:
+; CHECK-NEXT:    br i1 [[TMP70]], label %[[BB61:.*]], label %[[BB62:.*]]
+; CHECK:       [[BB61]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP61]], i64 4) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB74]]
-; CHECK:       [[BB74]]:
-; CHECK-NEXT:    br label %[[BB75]]
-; CHECK:       [[BB75]]:
+; CHECK-NEXT:    br label %[[BB62]]
+; CHECK:       [[BB62]]:
+; CHECK-NEXT:    br label %[[BB63]]
+; CHECK:       [[BB63]]:
 ; CHECK-NEXT:    [[TMP76:%.*]] = ptrtoint ptr addrspace(3) [[TMP57]] to i32
 ; CHECK-NEXT:    [[TMP77:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP77]], i32 [[TMP76]]
@@ -136,16 +124,16 @@ define amdgpu_kernel void @k0() #0 {
 ; CHECK-NEXT:    [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
 ; CHECK-NEXT:    [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
 ; CHECK-NEXT:    [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
-; CHECK-NEXT:    br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF1]]
+; CHECK-NEXT:    br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB81:.*]], !prof [[PROF1]]
 ; CHECK:       [[ASAN_REPORT2]]:
-; CHECK-NEXT:    br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]]
-; CHECK:       [[BB91]]:
+; CHECK-NEXT:    br i1 [[TMP88]], label %[[BB79:.*]], label %[[BB80:.*]]
+; CHECK:       [[BB79]]:
 ; CHECK-NEXT:    call void @__asan_report_store_n(i64 [[TMP79]], i64 4) #[[ATTR7]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT:    br label %[[BB92]]
-; CHECK:       [[BB92]]:
-; CHECK-NEXT:    br label %[[BB93]]
-; CHECK:       [[BB93]]:
+; CHECK-NEXT:    br label %[[BB80]]
+; CHECK:       [[BB80]]:
+; CHECK-NEXT:    br label %[[BB81]]
+; CHECK:       [[BB81]]:
 ; CHECK-NEXT:    store i32 8, ptr addrspace(3) [[TMP36]], align 2
 ; CHECK-NEXT:    br label %[[CONDFREE:.*]]
 ; CHECK:       [[CONDFREE]]:
@@ -170,7 +158,7 @@ WId:
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %Malloc, label %14
 
-Malloc:                                           ; preds = %WId
+Malloc:
   %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
   %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
   %8 = add i32 %6, %7
@@ -182,7 +170,7 @@ Malloc:                                           ; preds = %WId
   store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   br label %14
 
-14:                                               ; preds = %Malloc, %WId
+14:
   %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
   call void @llvm.amdgcn.s.barrier()
   %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -193,11 +181,11 @@ Malloc:                                           ; preds = %WId
   store i32 8, ptr addrspace(3) %18, align 2
   br label %CondFree
 
-CondFree:                                         ; preds = %14
+CondFree:
   call void @llvm.amdgcn.s.barrier()
   br i1 %xyzCond, label %Free, label %End
 
-Free:                                             ; preds = %CondFree
+Free:
   %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
   %20 = call ptr @llvm.returnaddress(i32 0)
   %21 = ptrtoint ptr %20 to i64
@@ -205,27 +193,16 @@ Free:                                             ; preds = %CondFree
   call void @__asan_free_impl(i64 %22, i64 %21)
   br label %End
 
-End:                                              ; preds = %Free, %CondFree
+End:
   ret void
 }
 
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i32 @llvm.amdgcn.workitem.id.z() #1
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
 declare ptr @llvm.returnaddress(i32 immarg) #2
-
 declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
 declare void @llvm.amdgcn.s.barrier() #3
-
 declare void @__asan_free_impl(i64, i64)
 
 attributes #0 = { sanitize_address "amdgpu-lds-size"="24" }



More information about the llvm-commits mailing list