[llvm] [AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass (PR #89208)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 25 07:00:54 PDT 2024
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/89208
>From 65dd67a36141133510c9e5c2c71e30069524e0d7 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 8 Mar 2024 16:43:57 +0530
Subject: [PATCH 1/2] [AMDGPU] Enable asan LDS instrumentation
---
.../Instrumentation/AddressSanitizer.cpp | 387 +++++++++++--
.../asan-dynamic-lds-indirect-access.ll | 529 ++++++++++++++++++
.../AMDGPU/asan-dynamic-lds-test.ll | 231 ++++++++
.../AMDGPU/asan-static-indirect-access.ll | 476 ++++++++++++++++
...atic-lds-indirect-access-function-param.ll | 203 +++++++
.../AMDGPU/asan-static-lds-test.ll | 249 +++++++++
.../AMDGPU/asan_do_not_instrument_lds.ll | 27 -
.../asan_instrument_generic_address_space.ll | 96 ++--
8 files changed, 2071 insertions(+), 127 deletions(-)
create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
delete mode 100644 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 9cc978dc6c16e..a312f1d50fe9a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -176,6 +176,7 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
+const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id";
const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
@@ -628,12 +629,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
} // namespace llvm
-static uint64_t getRedzoneSizeForScale(int MappingScale) {
- // Redzone used for stack and globals is at least 32 bytes.
- // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
- return std::max(32U, 1U << MappingScale);
-}
-
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
if (TargetTriple.isOSEmscripten()) {
return kAsanEmscriptenCtorAndDtorPriority;
@@ -939,10 +934,7 @@ class ModuleAddressSanitizer {
StringRef getGlobalMetadataSection() const;
void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
- uint64_t getMinRedzoneSizeForGlobal() const {
- return getRedzoneSizeForScale(Mapping.Scale);
- }
- uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const;
+
int GetAsanVersion(const Module &M) const;
bool CompileKernel;
@@ -1239,6 +1231,290 @@ void AddressSanitizerPass::printPipeline(
OS << '>';
}
+static uint64_t getRedzoneSizeForScale(int MappingScale) {
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ return std::max(32U, 1U << MappingScale);
+}
+
+static uint64_t getMinRedzoneSizeForGlobal(int Scale) {
+ return getRedzoneSizeForScale(Scale);
+}
+
+static uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes) {
+ constexpr uint64_t kMaxRZ = 1 << 18;
+ const uint64_t MinRZ = getMinRedzoneSizeForGlobal(Scale);
+
+ uint64_t RZ = 0;
+ if (SizeInBytes <= MinRZ / 2) {
+ // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
+ // at least 32 bytes, optimize when SizeInBytes is less than or equal to
+ // half of MinRZ.
+ RZ = MinRZ - SizeInBytes;
+ } else {
+ // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+ RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
+
+ // Round up to multiple of MinRZ.
+ if (SizeInBytes % MinRZ)
+ RZ += MinRZ - (SizeInBytes % MinRZ);
+ }
+
+ assert((RZ + SizeInBytes) % MinRZ == 0);
+
+ return RZ;
+}
+
+static GlobalVariable *getKernelSwLDSGlobal(Module &M, Function &F) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+ KernelLDSName += F.getName();
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwLDSMetadataGlobal(Module &M, Function &F) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+ KernelLDSName += F.getName();
+ KernelLDSName += ".md";
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwDynLDSGlobal(Module &M, Function &F) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.");
+ KernelLDSName += F.getName();
+ KernelLDSName += ".dynlds";
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwLDSBaseGlobal(Module &M) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.base.table");
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset,
+ bool UsesDynLDS) {
+ if (Offset != 0) {
+ std::string Buffer;
+ raw_string_ostream SS{Buffer};
+ SS << format("%u", Offset);
+ if (UsesDynLDS)
+ SS << format(",%u", Offset);
+ Func->addFnAttr("amdgpu-lds-size", Buffer);
+ }
+}
+
+static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV,
+ uint32_t Address) {
+ LLVMContext &Ctx = M.getContext();
+ auto *IntTy = M.getDataLayout().getIntPtrType(Ctx, 3);
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(Ctx, {MinC, MaxC}));
+}
+
+static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
+ Module *M = F.getParent();
+ GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+ GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+ if (!SwLDSMetadataGlobal || !SwLDSGlobal)
+ return;
+
+ LLVMContext &Ctx = M->getContext();
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+
+ Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+ Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne());
+ Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne());
+
+ StructType *MDStructType =
+ cast<StructType>(SwLDSMetadataGlobal->getValueType());
+ assert(MDStructType);
+ unsigned NumStructs = MDStructType->getNumElements();
+
+ std::vector<Type *> Items;
+ std::vector<Constant *> Initializers;
+ uint32_t MallocSize = 0;
+ //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size}
+ StructType *LDSItemTy = StructType::create(
+ Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int32Ty}, "");
+ for (unsigned i = 0; i < NumStructs; i++) {
+ Items.push_back(LDSItemTy);
+ ConstantStruct *member =
+ dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+ Constant *NewInitItem;
+ if (member) {
+ ConstantInt *GlobalSize =
+ cast<ConstantInt>(member->getAggregateElement(1U));
+ unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+ Constant *NewItemStartOffset = ConstantInt::get(Int32Ty, MallocSize);
+ if (GlobalSizeValue) {
+ Constant *NewItemGlobalSizeConst =
+ ConstantInt::get(Int32Ty, GlobalSizeValue);
+ const uint64_t RightRedzoneSize =
+ getRedzoneSizeForGlobal(Scale, GlobalSizeValue);
+ MallocSize += GlobalSizeValue;
+ Constant *NewItemRedzoneStartOffset =
+ ConstantInt::get(Int32Ty, MallocSize);
+ MallocSize += RightRedzoneSize;
+ Constant *NewItemRedzoneSize =
+ ConstantInt::get(Int32Ty, RightRedzoneSize);
+
+ unsigned NewItemAlignGlobalPlusRedzoneSize =
+ alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign);
+ Constant *NewItemAlignGlobalPlusRedzoneSizeConst =
+ ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize);
+ NewInitItem = ConstantStruct::get(
+ LDSItemTy, {NewItemStartOffset, NewItemGlobalSizeConst,
+ NewItemAlignGlobalPlusRedzoneSizeConst,
+ NewItemRedzoneStartOffset, NewItemRedzoneSize});
+ MallocSize = alignTo(MallocSize, LDSAlign);
+ } else {
+ Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ NewInitItem = ConstantStruct::get(
+ LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+ }
+ } else {
+ Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ NewInitItem = ConstantStruct::get(
+ LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+ }
+ Initializers.push_back(NewInitItem);
+ }
+ GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F);
+ bool usesDynLDS = SwDynLDS ? true : false;
+ updateLDSSizeFnAttr(&F, MallocSize, usesDynLDS);
+ if (usesDynLDS)
+ recordLDSAbsoluteAddress(*M, SwDynLDS, MallocSize);
+
+ StructType *MetadataStructType = StructType::create(Ctx, Items, "");
+
+ GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable(
+ *M, MetadataStructType, false, GlobalValue::InternalLinkage,
+ PoisonValue::get(MetadataStructType), "", nullptr,
+ GlobalValue::NotThreadLocal, 1, false);
+ Constant *Data = ConstantStruct::get(MetadataStructType, Initializers);
+ NewSwLDSMetadataGlobal->setInitializer(Data);
+ NewSwLDSMetadataGlobal->setAlignment(MdAlign);
+ GlobalValue::SanitizerMetadata MD;
+ MD.NoAddress = true;
+ NewSwLDSMetadataGlobal->setSanitizerMetadata(MD);
+
+ for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(U.getUser())) {
+ SmallVector<Constant *> Indices;
+ for (Use &Idx : GEP->indices()) {
+ Indices.push_back(cast<Constant>(Idx));
+ }
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ GEP->replaceAllUsesWith(NewGEP);
+ } else if (LoadInst *Load = dyn_cast<LoadInst>(U.getUser())) {
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ SmallVector<Constant *> Indices{zero, zero, zero};
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ IRBuilder<> IRB(Load);
+ LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP);
+ Load->replaceAllUsesWith(NewLoad);
+ Load->eraseFromParent();
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(U.getUser())) {
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ SmallVector<Constant *> Indices{zero, zero, zero};
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ IRBuilder<> IRB(Store);
+ StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP);
+ Store->replaceAllUsesWith(NewStore);
+ Store->eraseFromParent();
+ } else
+ report_fatal_error("AMDGPU Sw LDS Metadata User instruction not handled");
+ }
+ SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal);
+ NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal);
+ SwLDSMetadataGlobal->eraseFromParent();
+ return;
+}
+
+static void poisonRedzonesForSwLDS(Function &F) {
+ Module *M = F.getParent();
+ GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+ GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+
+ if (!SwLDSGlobal || !SwLDSMetadataGlobal)
+ return;
+
+ LLVMContext &Ctx = M->getContext();
+ Type *Int64Ty = Type::getInt64Ty(Ctx);
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ FunctionCallee AsanPoisonRegion = M->getOrInsertFunction(
+ StringRef("__asan_poison_region"),
+ FunctionType::get(VoidTy, {Int64Ty, Int64Ty}, false));
+ Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+
+ for (User *U : SwLDSGlobal->users()) {
+ StoreInst *SI = dyn_cast<StoreInst>(U);
+ if (!SI)
+ continue;
+
+ Type *PtrTy =
+ cast<PointerType>(SI->getValueOperand()->getType()->getScalarType());
+ unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+ if (AddrSpace != 1)
+ report_fatal_error("AMDGPU illegal store to SW LDS");
+
+ StructType *MDStructType =
+ cast<StructType>(SwLDSMetadataGlobal->getValueType());
+ assert(MDStructType);
+ unsigned NumStructs = MDStructType->getNumElements();
+ Value *StoreMallocPointer = SI->getValueOperand();
+
+ for (unsigned i = 0; i < NumStructs; i++) {
+ ConstantStruct *member =
+ dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+ if (!member)
+ continue;
+
+ ConstantInt *GlobalSize =
+ cast<ConstantInt>(member->getAggregateElement(1U));
+ unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+
+ if (!GlobalSizeValue)
+ continue;
+ IRBuilder<> IRB(SI);
+ IRB.SetInsertPoint(SI->getNextNode());
+
+ auto *GEPForOffset = IRB.CreateInBoundsGEP(
+ MDStructType, SwLDSMetadataGlobal,
+ {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)});
+
+ auto *GEPForSize = IRB.CreateInBoundsGEP(
+ MDStructType, SwLDSMetadataGlobal,
+ {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(4)});
+
+ Value *RedzoneOffset = IRB.CreateLoad(IRB.getInt32Ty(), GEPForOffset);
+ RedzoneOffset = IRB.CreateZExt(RedzoneOffset, IRB.getInt64Ty());
+ Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP(
+ IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset});
+ Value *RedzoneAddress =
+ IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty());
+ Value *RedzoneSize = IRB.CreateLoad(IRB.getInt32Ty(), GEPForSize);
+ RedzoneSize = IRB.CreateZExt(RedzoneSize, IRB.getInt64Ty());
+ IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, RedzoneSize});
+ }
+ }
+ return;
+}
+
+static void preProcessAMDGPULDSAccesses(Module &M, int Scale) {
+ for (Function &F : M) {
+ UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
+ poisonRedzonesForSwLDS(F);
+ }
+ return;
+}
+
AddressSanitizerPass::AddressSanitizerPass(
const AddressSanitizerOptions &Options, bool UseGlobalGC,
bool UseOdrIndicator, AsanDtorKind DestructorKind,
@@ -1249,6 +1525,13 @@ AddressSanitizerPass::AddressSanitizerPass(
PreservedAnalyses AddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
+ Triple TargetTriple = Triple(M.getTargetTriple());
+
+ if (TargetTriple.isAMDGPU()) {
+ unsigned LongSize = M.getDataLayout().getPointerSizeInBits();
+ ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false);
+ preProcessAMDGPULDSAccesses(M, Mapping.Scale);
+ }
ModuleAddressSanitizer ModuleSanitizer(
M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover,
UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind);
@@ -1304,7 +1587,15 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
- if (AddrSpace == 3 || AddrSpace == 5)
+ if (AddrSpace == 5)
+ return true;
+ return false;
+}
+
+static bool isGlobalInAMDGPULdsAddrspace(Value *Addr) {
+ Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+ unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+ if (AddrSpace == 3)
return true;
return false;
}
@@ -1795,10 +2086,8 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
return InsertBefore;
// Instrument generic addresses in supported addressspaces.
IRBuilder<> IRB(InsertBefore);
- Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {Addr});
Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {Addr});
- Value *IsSharedOrPrivate = IRB.CreateOr(IsShared, IsPrivate);
- Value *Cmp = IRB.CreateNot(IsSharedOrPrivate);
+ Value *Cmp = IRB.CreateNot(IsPrivate);
Value *AddrSpaceZeroLanding =
SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
InsertBefore = cast<Instruction>(AddrSpaceZeroLanding);
@@ -1857,7 +2146,38 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
return;
}
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ Value *AddrLong;
+ if (TargetTriple.isAMDGCN()) {
+ Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+ if (PtrTy->getPointerAddressSpace() == 3) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *Func = IRB.GetInsertBlock()->getParent();
+ Value *SwLDS;
+ if (Func->getCallingConv() == CallingConv::AMDGPU_KERNEL) {
+ SwLDS = getKernelSwLDSGlobal(*M, *Func);
+ } else {
+ GlobalVariable *LDSBaseTable = getKernelSwLDSBaseGlobal(*M);
+ if (LDSBaseTable) {
+ auto *KernelId = IRB.CreateCall(
+ M->getOrInsertFunction(kAMDGPULDSKernelId, IRB.getInt32Ty()), {});
+ Value *BaseGEP =
+ IRB.CreateInBoundsGEP(LDSBaseTable->getValueType(), LDSBaseTable,
+ {IRB.getInt32(0), KernelId});
+ SwLDS = IRB.CreateLoad(IRB.getPtrTy(3), BaseGEP);
+ } else {
+ SwLDS = IRB.CreateIntToPtr(IRB.getInt32(0), IRB.getPtrTy(3));
+ }
+ }
+ Value *PtrToInt = IRB.CreatePtrToInt(Addr, IRB.getInt32Ty());
+ Value *LoadMallocPtr = IRB.CreateLoad(IRB.getPtrTy(1), SwLDS);
+ Value *GEP =
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt});
+ AddrLong = IRB.CreatePointerCast(GEP, IntptrTy);
+ } else
+ AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ } else
+ AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
if (UseCalls) {
if (Exp == 0)
RTCI.createRuntimeCall(
@@ -2021,7 +2341,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
if (!G->hasInitializer()) return false;
// Globals in address space 1 and 4 are supported for AMDGPU.
if (G->getAddressSpace() &&
- !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)))
+ (!(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)) ||
+ !(TargetTriple.isAMDGPU() && !isGlobalInAMDGPULdsAddrspace(G))))
return false;
if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
// Two problems with thread-locals:
@@ -2029,7 +2350,9 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// - Need to poison all copies, not just the main thread's one.
if (G->isThreadLocal()) return false;
// For now, just ignore this Global if the alignment is large.
- if (G->getAlign() && *G->getAlign() > getMinRedzoneSizeForGlobal()) return false;
+ if (G->getAlign() &&
+ *G->getAlign() > getMinRedzoneSizeForGlobal(Mapping.Scale))
+ return false;
// For non-COFF targets, only instrument globals known to be defined by this
// TU.
@@ -2552,7 +2875,8 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
Type *Ty = G->getValueType();
const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
- const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes);
+ const uint64_t RightRedzoneSize =
+ getRedzoneSizeForGlobal(Mapping.Scale, SizeInBytes);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy);
@@ -2568,7 +2892,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
G->getThreadLocalMode(), G->getAddressSpace());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setComdat(G->getComdat());
- NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal()));
+ NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal(Mapping.Scale)));
// Don't fold globals with redzones. ODR violation detector and redzone
// poisoning implicitly creates a dependence on the global's address, so it
// is no longer valid for it to be marked unnamed_addr.
@@ -2688,31 +3012,6 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
LLVM_DEBUG(dbgs() << M);
}
-uint64_t
-ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const {
- constexpr uint64_t kMaxRZ = 1 << 18;
- const uint64_t MinRZ = getMinRedzoneSizeForGlobal();
-
- uint64_t RZ = 0;
- if (SizeInBytes <= MinRZ / 2) {
- // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
- // at least 32 bytes, optimize when SizeInBytes is less than or equal to
- // half of MinRZ.
- RZ = MinRZ - SizeInBytes;
- } else {
- // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
- RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
-
- // Round up to multiple of MinRZ.
- if (SizeInBytes % MinRZ)
- RZ += MinRZ - (SizeInBytes % MinRZ);
- }
-
- assert((RZ + SizeInBytes) % MinRZ == 0);
-
- return RZ;
-}
-
int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
int LongSize = M.getDataLayout().getPointerSizeInBits();
bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
new file mode 100755
index 0000000000000..f37fbf350ffde
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
@@ -0,0 +1,529 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol !1
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }, no_sanitize_address
+ at llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+ at llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @use_variables() #0 {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 3
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]])
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]]
+; CHECK: [[BB16]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB17]]
+; CHECK: [[BB17]]:
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = lshr i64 [[TMP21]], 3
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880
+; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0
+; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0
+; CHECK-NEXT: br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]]
+; CHECK: [[BB29]]:
+; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB30]]
+; CHECK: [[BB30]]:
+; CHECK-NEXT: br label %[[BB31]]
+; CHECK: [[BB31]]:
+; CHECK-NEXT: [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64
+; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP33]], 3
+; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880
+; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr
+; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0
+; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP33]], 7
+; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], 3
+; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8
+; CHECK-NEXT: [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]]
+; CHECK-NEXT: [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]]
+; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]])
+; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0
+; CHECK-NEXT: br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]]
+; CHECK: [[BB46]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB47]]
+; CHECK: [[BB47]]:
+; CHECK-NEXT: br label %[[BB48]]
+; CHECK: [[BB48]]:
+; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]]
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]])
+; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0
+; CHECK-NEXT: br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT3]]:
+; CHECK-NEXT: br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]]
+; CHECK: [[BB60]]:
+; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB61]]
+; CHECK: [[BB61]]:
+; CHECK-NEXT: br label %[[BB62]]
+; CHECK: [[BB62]]:
+; CHECK-NEXT: [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8
+; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
+; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP64]], 3
+; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880
+; CHECK-NEXT: [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr
+; CHECK-NEXT: [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
+; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0
+; CHECK-NEXT: [[TMP70:%.*]] = and i64 [[TMP64]], 7
+; CHECK-NEXT: [[TMP71:%.*]] = add i64 [[TMP70]], 3
+; CHECK-NEXT: [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8
+; CHECK-NEXT: [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]]
+; CHECK-NEXT: [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]]
+; CHECK-NEXT: [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]])
+; CHECK-NEXT: [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0
+; CHECK-NEXT: br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT4]]:
+; CHECK-NEXT: br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]]
+; CHECK: [[BB77]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB78]]
+; CHECK: [[BB78]]:
+; CHECK-NEXT: br label %[[BB79]]
+; CHECK: [[BB79]]:
+; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4
+; CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]]
+; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP82]]
+; CHECK-NEXT: [[TMP84:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP83]], align 4
+; CHECK-NEXT: [[TMP85:%.*]] = ptrtoint ptr addrspace(3) [[TMP50]] to i32
+; CHECK-NEXT: [[TMP86:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP84]], align 8
+; CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP86]], i32 [[TMP85]]
+; CHECK-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(1) [[TMP87]] to i64
+; CHECK-NEXT: [[TMP89:%.*]] = lshr i64 [[TMP88]], 3
+; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[TMP89]], 2147450880
+; CHECK-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP90]] to ptr
+; CHECK-NEXT: [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1
+; CHECK-NEXT: [[TMP93:%.*]] = icmp ne i8 [[TMP92]], 0
+; CHECK-NEXT: [[TMP94:%.*]] = and i64 [[TMP88]], 7
+; CHECK-NEXT: [[TMP95:%.*]] = trunc i64 [[TMP94]] to i8
+; CHECK-NEXT: [[TMP96:%.*]] = icmp sge i8 [[TMP95]], [[TMP92]]
+; CHECK-NEXT: [[TMP97:%.*]] = and i1 [[TMP93]], [[TMP96]]
+; CHECK-NEXT: [[TMP98:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP97]])
+; CHECK-NEXT: [[TMP99:%.*]] = icmp ne i64 [[TMP98]], 0
+; CHECK-NEXT: br i1 [[TMP99]], label %[[ASAN_REPORT5:.*]], label %[[BB102:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT5]]:
+; CHECK-NEXT: br i1 [[TMP97]], label %[[BB100:.*]], label %[[BB101:.*]]
+; CHECK: [[BB100]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP88]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB101]]
+; CHECK: [[BB101]]:
+; CHECK-NEXT: br label %[[BB102]]
+; CHECK: [[BB102]]:
+; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP50]], align 4
+; CHECK-NEXT: [[TMP103:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP103]]
+; CHECK-NEXT: [[TMP105:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP104]], align 4
+; CHECK-NEXT: [[TMP106:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32
+; CHECK-NEXT: [[TMP107:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP105]], align 8
+; CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP107]], i32 [[TMP106]]
+; CHECK-NEXT: [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP108]] to i64
+; CHECK-NEXT: [[TMP110:%.*]] = lshr i64 [[TMP109]], 3
+; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[TMP110]], 2147450880
+; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr
+; CHECK-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1
+; CHECK-NEXT: [[TMP114:%.*]] = icmp ne i8 [[TMP113]], 0
+; CHECK-NEXT: [[TMP115:%.*]] = and i64 [[TMP109]], 7
+; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i8
+; CHECK-NEXT: [[TMP117:%.*]] = icmp sge i8 [[TMP116]], [[TMP113]]
+; CHECK-NEXT: [[TMP118:%.*]] = and i1 [[TMP114]], [[TMP117]]
+; CHECK-NEXT: [[TMP119:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP118]])
+; CHECK-NEXT: [[TMP120:%.*]] = icmp ne i64 [[TMP119]], 0
+; CHECK-NEXT: br i1 [[TMP120]], label %[[ASAN_REPORT6:.*]], label %[[BB123:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT6]]:
+; CHECK-NEXT: br i1 [[TMP118]], label %[[BB121:.*]], label %[[BB122:.*]]
+; CHECK: [[BB121]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP109]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB122]]
+; CHECK: [[BB122]]:
+; CHECK-NEXT: br label %[[BB123]]
+; CHECK: [[BB123]]:
+; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP81]], align 8
+; CHECK-NEXT: ret void
+;
+ %1 = call i32 @llvm.amdgcn.lds.kernel.id()
+ %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1
+ %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4
+ %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0
+ %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8
+ %6 = load i32, ptr addrspace(1) %5, align 4
+ %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6
+ %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1
+ %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8
+ %10 = load i32, ptr addrspace(1) %9, align 4
+ %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10
+ store i8 3, ptr addrspace(3) %7, align 4
+ store i8 3, ptr addrspace(3) %11, align 8
+ ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB60:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3
+; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8
+; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]]
+; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]])
+; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
+; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB25]]
+; CHECK: [[BB25]]:
+; CHECK-NEXT: br label %[[BB26]]
+; CHECK: [[BB26]]:
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 7
+; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 8
+; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8
+; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
+; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT: store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], 7
+; CHECK-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], 8
+; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], 8
+; CHECK-NEXT: store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP31]], [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
+; CHECK-NEXT: [[TMP38:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64
+; CHECK-NEXT: [[TMP40:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP37]], i64 [[TMP39]])
+; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP41]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP43]]
+; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
+; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 [[TMP47]])
+; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT: [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP49]]
+; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP50]] to i64
+; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP52]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 [[TMP53]])
+; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT: [[TMP55:%.*]] = zext i32 [[TMP54]] to i64
+; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP55]]
+; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
+; CHECK-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 [[TMP59]])
+; CHECK-NEXT: br label %[[BB60]]
+; CHECK: [[BB60]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP61]]
+; CHECK-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP63]]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+; CHECK-NEXT: call void @use_variables()
+; CHECK-NEXT: [[TMP65:%.*]] = ptrtoint ptr addrspace(3) [[TMP62]] to i32
+; CHECK-NEXT: [[TMP66:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP66]], i32 [[TMP65]]
+; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
+; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP68]], 3
+; CHECK-NEXT: [[TMP70:%.*]] = add i64 [[TMP69]], 2147450880
+; CHECK-NEXT: [[TMP71:%.*]] = inttoptr i64 [[TMP70]] to ptr
+; CHECK-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
+; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i8 [[TMP72]], 0
+; CHECK-NEXT: [[TMP74:%.*]] = and i64 [[TMP68]], 7
+; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i8
+; CHECK-NEXT: [[TMP76:%.*]] = icmp sge i8 [[TMP75]], [[TMP72]]
+; CHECK-NEXT: [[TMP77:%.*]] = and i1 [[TMP73]], [[TMP76]]
+; CHECK-NEXT: [[TMP78:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP77]])
+; CHECK-NEXT: [[TMP79:%.*]] = icmp ne i64 [[TMP78]], 0
+; CHECK-NEXT: br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB82:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP77]], label %[[BB80:.*]], label %[[BB81:.*]]
+; CHECK: [[BB80]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP68]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB81]]
+; CHECK: [[BB81]]:
+; CHECK-NEXT: br label %[[BB82]]
+; CHECK: [[BB82]]:
+; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP62]], align 1
+; CHECK-NEXT: [[TMP83:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i64
+; CHECK-NEXT: [[TMP84:%.*]] = add i64 [[TMP83]], 3
+; CHECK-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP84]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP86:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i32
+; CHECK-NEXT: [[TMP87:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP87]], i32 [[TMP86]]
+; CHECK-NEXT: [[TMP89:%.*]] = ptrtoint ptr addrspace(1) [[TMP88]] to i64
+; CHECK-NEXT: [[TMP90:%.*]] = lshr i64 [[TMP89]], 3
+; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[TMP90]], 2147450880
+; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr
+; CHECK-NEXT: [[TMP93:%.*]] = load i8, ptr [[TMP92]], align 1
+; CHECK-NEXT: [[TMP94:%.*]] = icmp ne i8 [[TMP93]], 0
+; CHECK-NEXT: [[TMP95:%.*]] = and i64 [[TMP89]], 7
+; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i8
+; CHECK-NEXT: [[TMP97:%.*]] = icmp sge i8 [[TMP96]], [[TMP93]]
+; CHECK-NEXT: [[TMP98:%.*]] = and i1 [[TMP94]], [[TMP97]]
+; CHECK-NEXT: [[TMP99:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP98]])
+; CHECK-NEXT: [[TMP100:%.*]] = icmp ne i64 [[TMP99]], 0
+; CHECK-NEXT: br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB103:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP98]], label %[[BB101:.*]], label %[[BB102:.*]]
+; CHECK: [[BB101]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP89]], i64 4) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB102]]
+; CHECK: [[BB102]]:
+; CHECK-NEXT: br label %[[BB103]]
+; CHECK: [[BB103]]:
+; CHECK-NEXT: [[TMP104:%.*]] = ptrtoint ptr addrspace(3) [[TMP85]] to i32
+; CHECK-NEXT: [[TMP105:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP105]], i32 [[TMP104]]
+; CHECK-NEXT: [[TMP107:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64
+; CHECK-NEXT: [[TMP108:%.*]] = lshr i64 [[TMP107]], 3
+; CHECK-NEXT: [[TMP109:%.*]] = add i64 [[TMP108]], 2147450880
+; CHECK-NEXT: [[TMP110:%.*]] = inttoptr i64 [[TMP109]] to ptr
+; CHECK-NEXT: [[TMP111:%.*]] = load i8, ptr [[TMP110]], align 1
+; CHECK-NEXT: [[TMP112:%.*]] = icmp ne i8 [[TMP111]], 0
+; CHECK-NEXT: [[TMP113:%.*]] = and i64 [[TMP107]], 7
+; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i8
+; CHECK-NEXT: [[TMP115:%.*]] = icmp sge i8 [[TMP114]], [[TMP111]]
+; CHECK-NEXT: [[TMP116:%.*]] = and i1 [[TMP112]], [[TMP115]]
+; CHECK-NEXT: [[TMP117:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP116]])
+; CHECK-NEXT: [[TMP118:%.*]] = icmp ne i64 [[TMP117]], 0
+; CHECK-NEXT: br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB121:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT3]]:
+; CHECK-NEXT: br i1 [[TMP116]], label %[[BB119:.*]], label %[[BB120:.*]]
+; CHECK: [[BB119]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP107]], i64 4) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB120]]
+; CHECK: [[BB120]]:
+; CHECK-NEXT: br label %[[BB121]]
+; CHECK: [[BB121]]:
+; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP64]], align 2
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP122:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP123:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP124:%.*]] = ptrtoint ptr [[TMP123]] to i64
+; CHECK-NEXT: [[TMP125:%.*]] = ptrtoint ptr addrspace(1) [[TMP122]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP125]], i64 [[TMP124]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+WId:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = call i32 @llvm.amdgcn.workitem.id.y()
+ %2 = call i32 @llvm.amdgcn.workitem.id.z()
+ %3 = or i32 %0, %1
+ %4 = or i32 %3, %2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %Malloc, label %26
+
+Malloc: ; preds = %WId
+ %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+ %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+ %8 = add i32 %6, %7
+ %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15
+ store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+ %11 = load i32, ptr addrspace(4) %10, align 4
+ store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+ %12 = add i32 %11, 7
+ %13 = udiv i32 %12, 8
+ %14 = mul i32 %13, 8
+ store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+ %15 = add i32 %8, %14
+ store i32 %15, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+ %16 = load i32, ptr addrspace(4) %10, align 4
+ store i32 %16, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+ %17 = add i32 %16, 7
+ %18 = udiv i32 %17, 8
+ %19 = mul i32 %18, 8
+ store i32 %19, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+ %20 = add i32 %15, %19
+ %21 = zext i32 %20 to i64
+ %22 = call ptr @llvm.returnaddress(i32 0)
+ %23 = ptrtoint ptr %22 to i64
+ %24 = call i64 @__asan_malloc_impl(i64 %21, i64 %23)
+ %25 = inttoptr i64 %24 to ptr addrspace(1)
+ store ptr addrspace(1) %25, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ br label %26
+
+26: ; preds = %Malloc, %WId
+ %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+ call void @llvm.amdgcn.s.barrier()
+ %27 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+ %28 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %27
+ %29 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+ %30 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %29
+ call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+ call void @use_variables()
+ store i8 7, ptr addrspace(3) %28, align 1
+ store i32 8, ptr addrspace(3) %30, align 2
+ br label %CondFree
+
+CondFree: ; preds = %26
+ call void @llvm.amdgcn.s.barrier()
+ br i1 %xyzCond, label %Free, label %End
+
+Free: ; preds = %CondFree
+ %31 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ %32 = call ptr @llvm.returnaddress(i32 0)
+ %33 = ptrtoint ptr %32 to i64
+ %34 = ptrtoint ptr addrspace(1) %31 to i64
+ call void @__asan_free_impl(i64 %34, i64 %33)
+ br label %End
+
+End: ; preds = %Free, %CondFree
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare void @llvm.donothing() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #3
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #2
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.lds.kernel.id() #3
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="24,24" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 24, i32 25}
+!2 = !{i32 0}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="96,96" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1]] = !{i32 96, i32 97}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META3]] = !{i32 0}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
new file mode 100755
index 0000000000000..1be50f48aa6f6
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0
+ at llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB43:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3
+; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8
+; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]]
+; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]])
+; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
+; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB25]]
+; CHECK: [[BB25]]:
+; CHECK-NEXT: br label %[[BB26]]
+; CHECK: [[BB26]]:
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 0
+; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 1
+; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 1
+; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
+; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
+; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64
+; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]])
+; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i64
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
+; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT: [[TMP42:%.*]] = zext i32 [[TMP41]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 [[TMP42]])
+; CHECK-NEXT: br label %[[BB43]]
+; CHECK: [[BB43]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]]
+; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32
+; CHECK-NEXT: [[TMP47:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP47]], i32 [[TMP46]]
+; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64
+; CHECK-NEXT: [[TMP50:%.*]] = lshr i64 [[TMP49]], 3
+; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880
+; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr
+; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0
+; CHECK-NEXT: [[TMP55:%.*]] = and i64 [[TMP49]], 7
+; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8
+; CHECK-NEXT: [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP53]]
+; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]]
+; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
+; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
+; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP58]], label %[[BB61:.*]], label %[[BB62:.*]]
+; CHECK: [[BB61]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB62]]
+; CHECK: [[BB62]]:
+; CHECK-NEXT: br label %[[BB63]]
+; CHECK: [[BB63]]:
+; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP45]], align 4
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP64:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP65:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP65]] to i64
+; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(1) [[TMP64]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP67]], i64 [[TMP66]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+WId:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = call i32 @llvm.amdgcn.workitem.id.y()
+ %2 = call i32 @llvm.amdgcn.workitem.id.z()
+ %3 = or i32 %0, %1
+ %4 = or i32 %3, %2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %Malloc, label %21
+
+Malloc: ; preds = %WId
+ %6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
+ %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+ %8 = add i32 %6, %7
+ %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15
+ store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+ %11 = load i32, ptr addrspace(4) %10, align 4
+ store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+ %12 = add i32 %11, 0
+ %13 = udiv i32 %12, 1
+ %14 = mul i32 %13, 1
+ store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+ %15 = add i32 %8, %14
+ %16 = zext i32 %15 to i64
+ %17 = call ptr @llvm.returnaddress(i32 0)
+ %18 = ptrtoint ptr %17 to i64
+ %19 = call i64 @__asan_malloc_impl(i64 %16, i64 %18)
+ %20 = inttoptr i64 %19 to ptr addrspace(1)
+ store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ br label %21
+
+21: ; preds = %Malloc, %WId
+ %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+ call void @llvm.amdgcn.s.barrier()
+ %22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+ %23 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %22
+ call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
+ store i8 7, ptr addrspace(3) %23, align 4
+ br label %CondFree
+
+CondFree: ; preds = %21
+ call void @llvm.amdgcn.s.barrier()
+ br i1 %xyzCond, label %Free, label %End
+
+Free: ; preds = %CondFree
+ %24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ %25 = call ptr @llvm.returnaddress(i32 0)
+ %26 = ptrtoint ptr %25 to i64
+ %27 = ptrtoint ptr addrspace(1) %24 to i64
+ call void @__asan_free_impl(i64 %27, i64 %26)
+ br label %End
+
+End: ; preds = %Free, %CondFree
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare void @llvm.donothing() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #1
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #3
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address "amdgpu-lds-size"="8,8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 8, i32 9}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="32,32" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR7]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1]] = !{i32 32, i32 33}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
new file mode 100755
index 0000000000000..23f27aa797e73
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
@@ -0,0 +1,476 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 3, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 4, i32 8 } }, no_sanitize_address
+ at llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+ at llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 3, i32 32, i32 99, i32 29 }, %1 { i32 128, i32 4, i32 32, i32 132, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @use_variables() #0 {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 3
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]])
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]]
+; CHECK: [[BB16]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB17]]
+; CHECK: [[BB17]]:
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = lshr i64 [[TMP21]], 3
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880
+; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0
+; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]])
+; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0
+; CHECK-NEXT: br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]]
+; CHECK: [[BB29]]:
+; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB30]]
+; CHECK: [[BB30]]:
+; CHECK-NEXT: br label %[[BB31]]
+; CHECK: [[BB31]]:
+; CHECK-NEXT: [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64
+; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP33]], 3
+; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880
+; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr
+; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
+; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0
+; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP33]], 7
+; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], 3
+; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8
+; CHECK-NEXT: [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]]
+; CHECK-NEXT: [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]]
+; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]])
+; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0
+; CHECK-NEXT: br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]]
+; CHECK: [[BB46]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB47]]
+; CHECK: [[BB47]]:
+; CHECK-NEXT: br label %[[BB48]]
+; CHECK: [[BB48]]:
+; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]]
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]])
+; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0
+; CHECK-NEXT: br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT3]]:
+; CHECK-NEXT: br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]]
+; CHECK: [[BB60]]:
+; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB61]]
+; CHECK: [[BB61]]:
+; CHECK-NEXT: br label %[[BB62]]
+; CHECK: [[BB62]]:
+; CHECK-NEXT: [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8
+; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64
+; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP64]], 3
+; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880
+; CHECK-NEXT: [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr
+; CHECK-NEXT: [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
+; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0
+; CHECK-NEXT: [[TMP70:%.*]] = and i64 [[TMP64]], 7
+; CHECK-NEXT: [[TMP71:%.*]] = add i64 [[TMP70]], 3
+; CHECK-NEXT: [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8
+; CHECK-NEXT: [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]]
+; CHECK-NEXT: [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]]
+; CHECK-NEXT: [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]])
+; CHECK-NEXT: [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0
+; CHECK-NEXT: br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT4]]:
+; CHECK-NEXT: br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]]
+; CHECK: [[BB77]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB78]]
+; CHECK: [[BB78]]:
+; CHECK-NEXT: br label %[[BB79]]
+; CHECK: [[BB79]]:
+; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4
+; CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]]
+; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr
+; CHECK-NEXT: [[TMP82:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr
+; CHECK-NEXT: [[TMP83:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP82]])
+; CHECK-NEXT: [[TMP84:%.*]] = xor i1 [[TMP83]], true
+; CHECK-NEXT: br i1 [[TMP84]], label %[[BB85:.*]], label %[[BB101:.*]]
+; CHECK: [[BB85]]:
+; CHECK-NEXT: [[TMP86:%.*]] = ptrtoint ptr [[TMP82]] to i64
+; CHECK-NEXT: [[TMP87:%.*]] = lshr i64 [[TMP86]], 3
+; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[TMP87]], 2147450880
+; CHECK-NEXT: [[TMP89:%.*]] = inttoptr i64 [[TMP88]] to ptr
+; CHECK-NEXT: [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
+; CHECK-NEXT: [[TMP91:%.*]] = icmp ne i8 [[TMP90]], 0
+; CHECK-NEXT: [[TMP92:%.*]] = and i64 [[TMP86]], 7
+; CHECK-NEXT: [[TMP93:%.*]] = trunc i64 [[TMP92]] to i8
+; CHECK-NEXT: [[TMP94:%.*]] = icmp sge i8 [[TMP93]], [[TMP90]]
+; CHECK-NEXT: [[TMP95:%.*]] = and i1 [[TMP91]], [[TMP94]]
+; CHECK-NEXT: [[TMP96:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP95]])
+; CHECK-NEXT: [[TMP97:%.*]] = icmp ne i64 [[TMP96]], 0
+; CHECK-NEXT: br i1 [[TMP97]], label %[[ASAN_REPORT5:.*]], label %[[BB100:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT5]]:
+; CHECK-NEXT: br i1 [[TMP95]], label %[[BB98:.*]], label %[[BB99:.*]]
+; CHECK: [[BB98]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP86]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB99]]
+; CHECK: [[BB99]]:
+; CHECK-NEXT: br label %[[BB100]]
+; CHECK: [[BB100]]:
+; CHECK-NEXT: br label %[[BB101]]
+; CHECK: [[BB101]]:
+; CHECK-NEXT: store i8 3, ptr [[TMP82]], align 4
+; CHECK-NEXT: [[TMP102:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP102]]
+; CHECK-NEXT: [[TMP104:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP103]], align 4
+; CHECK-NEXT: [[TMP105:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32
+; CHECK-NEXT: [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP104]], align 8
+; CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP106]], i32 [[TMP105]]
+; CHECK-NEXT: [[TMP108:%.*]] = ptrtoint ptr addrspace(1) [[TMP107]] to i64
+; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP108]], 3
+; CHECK-NEXT: [[TMP110:%.*]] = add i64 [[TMP109]], 2147450880
+; CHECK-NEXT: [[TMP111:%.*]] = inttoptr i64 [[TMP110]] to ptr
+; CHECK-NEXT: [[TMP112:%.*]] = load i8, ptr [[TMP111]], align 1
+; CHECK-NEXT: [[TMP113:%.*]] = icmp ne i8 [[TMP112]], 0
+; CHECK-NEXT: [[TMP114:%.*]] = and i64 [[TMP108]], 7
+; CHECK-NEXT: [[TMP115:%.*]] = trunc i64 [[TMP114]] to i8
+; CHECK-NEXT: [[TMP116:%.*]] = icmp sge i8 [[TMP115]], [[TMP112]]
+; CHECK-NEXT: [[TMP117:%.*]] = and i1 [[TMP113]], [[TMP116]]
+; CHECK-NEXT: [[TMP118:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP117]])
+; CHECK-NEXT: [[TMP119:%.*]] = icmp ne i64 [[TMP118]], 0
+; CHECK-NEXT: br i1 [[TMP119]], label %[[ASAN_REPORT6:.*]], label %[[BB122:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT6]]:
+; CHECK-NEXT: br i1 [[TMP117]], label %[[BB120:.*]], label %[[BB121:.*]]
+; CHECK: [[BB120]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP108]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB121]]
+; CHECK: [[BB121]]:
+; CHECK-NEXT: br label %[[BB122]]
+; CHECK: [[BB122]]:
+; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP81]], align 8
+; CHECK-NEXT: ret void
+;
+ %1 = call i32 @llvm.amdgcn.lds.kernel.id()
+ %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1
+ %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4
+ %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0
+ %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8
+ %6 = load i32, ptr addrspace(1) %5, align 4
+ %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6
+ %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1
+ %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8
+ %10 = load i32, ptr addrspace(1) %9, align 4
+ %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10
+ %X = addrspacecast ptr addrspace(3) %7 to ptr
+ %12 = addrspacecast ptr addrspace(3) %7 to ptr
+ store i8 3, ptr %12, align 4
+ store i8 3, ptr addrspace(3) %11, align 8
+ ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB44:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 3), align 4
+; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 4), align 4
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 3), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 4), align 4
+; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
+; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
+; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP34]] to i64
+; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP35]], i64 [[TMP37]])
+; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP38]] to i64
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP39]]
+; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64
+; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 [[TMP43]])
+; CHECK-NEXT: br label %[[BB44]]
+; CHECK: [[BB44]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP45]]
+; CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP47]]
+; CHECK-NEXT: call void @use_variables()
+; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(3) [[TMP46]] to i32
+; CHECK-NEXT: [[TMP50:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP50]], i32 [[TMP49]]
+; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64
+; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3
+; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880
+; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
+; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0
+; CHECK-NEXT: [[TMP58:%.*]] = and i64 [[TMP52]], 7
+; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i8
+; CHECK-NEXT: [[TMP60:%.*]] = icmp sge i8 [[TMP59]], [[TMP56]]
+; CHECK-NEXT: [[TMP61:%.*]] = and i1 [[TMP57]], [[TMP60]]
+; CHECK-NEXT: [[TMP62:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP61]])
+; CHECK-NEXT: [[TMP63:%.*]] = icmp ne i64 [[TMP62]], 0
+; CHECK-NEXT: br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB66:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP61]], label %[[BB64:.*]], label %[[BB65:.*]]
+; CHECK: [[BB64]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP52]]) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB65]]
+; CHECK: [[BB65]]:
+; CHECK-NEXT: br label %[[BB66]]
+; CHECK: [[BB66]]:
+; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP46]], align 1
+; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i64
+; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], 3
+; CHECK-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i32
+; CHECK-NEXT: [[TMP71:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP71]], i32 [[TMP70]]
+; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP72]] to i64
+; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP73]], 3
+; CHECK-NEXT: [[TMP75:%.*]] = add i64 [[TMP74]], 2147450880
+; CHECK-NEXT: [[TMP76:%.*]] = inttoptr i64 [[TMP75]] to ptr
+; CHECK-NEXT: [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
+; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i8 [[TMP77]], 0
+; CHECK-NEXT: [[TMP79:%.*]] = and i64 [[TMP73]], 7
+; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i8
+; CHECK-NEXT: [[TMP81:%.*]] = icmp sge i8 [[TMP80]], [[TMP77]]
+; CHECK-NEXT: [[TMP82:%.*]] = and i1 [[TMP78]], [[TMP81]]
+; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP82]])
+; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i64 [[TMP83]], 0
+; CHECK-NEXT: br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB87:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP82]], label %[[BB85:.*]], label %[[BB86:.*]]
+; CHECK: [[BB85]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP73]], i64 4) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB86]]
+; CHECK: [[BB86]]:
+; CHECK-NEXT: br label %[[BB87]]
+; CHECK: [[BB87]]:
+; CHECK-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(3) [[TMP69]] to i32
+; CHECK-NEXT: [[TMP89:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP89]], i32 [[TMP88]]
+; CHECK-NEXT: [[TMP91:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64
+; CHECK-NEXT: [[TMP92:%.*]] = lshr i64 [[TMP91]], 3
+; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[TMP92]], 2147450880
+; CHECK-NEXT: [[TMP94:%.*]] = inttoptr i64 [[TMP93]] to ptr
+; CHECK-NEXT: [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
+; CHECK-NEXT: [[TMP96:%.*]] = icmp ne i8 [[TMP95]], 0
+; CHECK-NEXT: [[TMP97:%.*]] = and i64 [[TMP91]], 7
+; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i8
+; CHECK-NEXT: [[TMP99:%.*]] = icmp sge i8 [[TMP98]], [[TMP95]]
+; CHECK-NEXT: [[TMP100:%.*]] = and i1 [[TMP96]], [[TMP99]]
+; CHECK-NEXT: [[TMP101:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP100]])
+; CHECK-NEXT: [[TMP102:%.*]] = icmp ne i64 [[TMP101]], 0
+; CHECK-NEXT: br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB105:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP100]], label %[[BB103:.*]], label %[[BB104:.*]]
+; CHECK: [[BB103]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP91]], i64 4) #[[ATTR8]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB104]]
+; CHECK: [[BB104]]:
+; CHECK-NEXT: br label %[[BB105]]
+; CHECK: [[BB105]]:
+; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP48]], align 2
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP107:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP108:%.*]] = ptrtoint ptr [[TMP107]] to i64
+; CHECK-NEXT: [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP109]], i64 [[TMP108]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+WId:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = call i32 @llvm.amdgcn.workitem.id.y()
+ %2 = call i32 @llvm.amdgcn.workitem.id.z()
+ %3 = or i32 %0, %1
+ %4 = or i32 %3, %2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %Malloc, label %14
+
+Malloc: ; preds = %WId
+ %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+ %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+ %8 = add i32 %6, %7
+ %9 = zext i32 %8 to i64
+ %10 = call ptr @llvm.returnaddress(i32 0)
+ %11 = ptrtoint ptr %10 to i64
+ %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+ %13 = inttoptr i64 %12 to ptr addrspace(1)
+ store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ br label %14
+
+14: ; preds = %Malloc, %WId
+ %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+ call void @llvm.amdgcn.s.barrier()
+ %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+ %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15
+ %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+ %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17
+ call void @use_variables()
+ store i8 7, ptr addrspace(3) %16, align 1
+ store i32 8, ptr addrspace(3) %18, align 2
+ br label %CondFree
+
+CondFree: ; preds = %14
+ call void @llvm.amdgcn.s.barrier()
+ br i1 %xyzCond, label %Free, label %End
+
+Free: ; preds = %CondFree
+ %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ %20 = call ptr @llvm.returnaddress(i32 0)
+ %21 = ptrtoint ptr %20 to i64
+ %22 = ptrtoint ptr addrspace(1) %19 to i64
+ call void @__asan_free_impl(i64 %22, i64 %21)
+ br label %End
+
+End: ; preds = %Free, %CondFree
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #3
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.lds.kernel.id() #2
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="40" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+!1 = !{i32 0}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="160" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[META2]] = !{i32 0}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
new file mode 100755
index 0000000000000..072174db11f25
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.my_kernel.md.type = type { %llvm.amdgcn.sw.lds.my_kernel.md.item, %llvm.amdgcn.sw.lds.my_kernel.md.item }
+%llvm.amdgcn.sw.lds.my_kernel.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 8, i32 4096, i32 4096 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 4096, i32 5120, i32 4128, i32 1024 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define void @my_function(ptr addrspace(3) %lds_arg) #0 {
+; CHECK-LABEL: define void @my_function(
+; CHECK-SAME: ptr addrspace(3) [[LDS_ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(3) null, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr addrspace(1) [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2147450880
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP4]], 7
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 3
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i8
+; CHECK-NEXT: [[TMP13:%.*]] = icmp sge i8 [[TMP12]], [[TMP8]]
+; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP9]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP14]])
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
+; CHECK-NEXT: br i1 [[TMP16]], label %[[ASAN_REPORT:.*]], label %[[BB19:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP14]], label %[[BB17:.*]], label %[[BB18:.*]]
+; CHECK: [[BB17]]:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP4]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: br label %[[BB19]]
+; CHECK: [[BB19]]:
+; CHECK-NEXT: [[LDS_VAL:%.*]] = load i32, ptr addrspace(3) [[LDS_ARG]], align 4
+; CHECK-NEXT: [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1
+; CHECK-NEXT: store i32 [[NEW_LDS_VAL]], ptr addrspace(3) [[LDS_ARG]], align 4
+; CHECK-NEXT: ret void
+;
+ %lds_val = load i32, ptr addrspace(3) %lds_arg, align 4
+ %new_lds_val = add i32 %lds_val, 1
+ store i32 %new_lds_val, ptr addrspace(3) %lds_arg, align 4
+ ret void
+}
+
+; Function Attrs: sanitize_address
+define amdgpu_kernel void @my_kernel() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @my_kernel(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB26:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT: br label %[[BB26]]
+; CHECK: [[BB26]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP27]]
+; CHECK-NEXT: [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP28]], i32 0, i32 0
+; CHECK-NEXT: call void @my_function(ptr addrspace(3) [[LDS_PTR]])
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
+; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+WId:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = call i32 @llvm.amdgcn.workitem.id.y()
+ %2 = call i32 @llvm.amdgcn.workitem.id.z()
+ %3 = or i32 %0, %1
+ %4 = or i32 %3, %2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %Malloc, label %14
+
+Malloc: ; preds = %WId
+ %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+ %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+ %8 = add i32 %6, %7
+ %9 = zext i32 %8 to i64
+ %10 = call ptr @llvm.returnaddress(i32 0)
+ %11 = ptrtoint ptr %10 to i64
+ %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+ %13 = inttoptr i64 %12 to ptr addrspace(1)
+ store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+ br label %14
+
+14: ; preds = %Malloc, %WId
+ %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+ call void @llvm.amdgcn.s.barrier()
+ %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+ %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 %15
+ %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) %16, i32 0, i32 0
+ call void @my_function(ptr addrspace(3) %lds_ptr)
+ br label %CondFree
+
+CondFree: ; preds = %14
+ call void @llvm.amdgcn.s.barrier()
+ br i1 %xyzCond, label %Free, label %End
+
+Free: ; preds = %CondFree
+ %17 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
+ %18 = call ptr @llvm.returnaddress(i32 0)
+ %19 = ptrtoint ptr %18 to i64
+ %20 = ptrtoint ptr addrspace(1) %17 to i64
+ call void @__asan_free_impl(i64 %20, i64 %19)
+ br label %End
+
+End: ; preds = %Free, %CondFree
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #2
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #2
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #3
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #4
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address }
+attributes #1 = { sanitize_address "amdgpu-lds-size"="4104" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #4 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="5152" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR8]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
new file mode 100755
index 0000000000000..99dbf6c607ab5
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
@@ -0,0 +1,249 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
+
+ at llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0
+ at llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 } }, no_sanitize_address
+
+; Function Attrs: sanitize_address
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
+; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
+; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
+; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
+; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
+; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
+; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
+; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
+; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
+; CHECK-NEXT: br label %[[BB32]]
+; CHECK: [[BB32]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
+; CHECK-NEXT: [[TMP38:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP38]], i32 [[TMP37]]
+; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
+; CHECK-NEXT: [[TMP41:%.*]] = lshr i64 [[TMP40]], 3
+; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], 2147450880
+; CHECK-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr
+; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1
+; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i8 [[TMP44]], 0
+; CHECK-NEXT: [[TMP46:%.*]] = and i64 [[TMP40]], 7
+; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8
+; CHECK-NEXT: [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP44]]
+; CHECK-NEXT: [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
+; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
+; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
+; CHECK-NEXT: br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
+; CHECK: [[BB52]]:
+; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB53]]
+; CHECK: [[BB53]]:
+; CHECK-NEXT: br label %[[BB54]]
+; CHECK: [[BB54]]:
+; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP34]], align 4
+; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i64
+; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], 3
+; CHECK-NEXT: [[TMP57:%.*]] = inttoptr i64 [[TMP56]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i32
+; CHECK-NEXT: [[TMP59:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP59]], i32 [[TMP58]]
+; CHECK-NEXT: [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP60]] to i64
+; CHECK-NEXT: [[TMP62:%.*]] = lshr i64 [[TMP61]], 3
+; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], 2147450880
+; CHECK-NEXT: [[TMP64:%.*]] = inttoptr i64 [[TMP63]] to ptr
+; CHECK-NEXT: [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
+; CHECK-NEXT: [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0
+; CHECK-NEXT: [[TMP67:%.*]] = and i64 [[TMP61]], 7
+; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i8
+; CHECK-NEXT: [[TMP69:%.*]] = icmp sge i8 [[TMP68]], [[TMP65]]
+; CHECK-NEXT: [[TMP70:%.*]] = and i1 [[TMP66]], [[TMP69]]
+; CHECK-NEXT: [[TMP71:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP70]])
+; CHECK-NEXT: [[TMP72:%.*]] = icmp ne i64 [[TMP71]], 0
+; CHECK-NEXT: br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB75:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP70]], label %[[BB73:.*]], label %[[BB74:.*]]
+; CHECK: [[BB73]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP61]], i64 4) #[[ATTR7]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB74]]
+; CHECK: [[BB74]]:
+; CHECK-NEXT: br label %[[BB75]]
+; CHECK: [[BB75]]:
+; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr addrspace(3) [[TMP57]] to i32
+; CHECK-NEXT: [[TMP77:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP77]], i32 [[TMP76]]
+; CHECK-NEXT: [[TMP79:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64
+; CHECK-NEXT: [[TMP80:%.*]] = lshr i64 [[TMP79]], 3
+; CHECK-NEXT: [[TMP81:%.*]] = add i64 [[TMP80]], 2147450880
+; CHECK-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP81]] to ptr
+; CHECK-NEXT: [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
+; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0
+; CHECK-NEXT: [[TMP85:%.*]] = and i64 [[TMP79]], 7
+; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i8
+; CHECK-NEXT: [[TMP87:%.*]] = icmp sge i8 [[TMP86]], [[TMP83]]
+; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
+; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
+; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
+; CHECK-NEXT: br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF1]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]]
+; CHECK: [[BB91]]:
+; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP79]], i64 4) #[[ATTR7]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label %[[BB92]]
+; CHECK: [[BB92]]:
+; CHECK-NEXT: br label %[[BB93]]
+; CHECK: [[BB93]]:
+; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP36]], align 2
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP94:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP95:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP96:%.*]] = ptrtoint ptr [[TMP95]] to i64
+; CHECK-NEXT: [[TMP97:%.*]] = ptrtoint ptr addrspace(1) [[TMP94]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP97]], i64 [[TMP96]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+WId:
+ %0 = call i32 @llvm.amdgcn.workitem.id.x()
+ %1 = call i32 @llvm.amdgcn.workitem.id.y()
+ %2 = call i32 @llvm.amdgcn.workitem.id.z()
+ %3 = or i32 %0, %1
+ %4 = or i32 %3, %2
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %Malloc, label %14
+
+Malloc: ; preds = %WId
+ %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+ %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+ %8 = add i32 %6, %7
+ %9 = zext i32 %8 to i64
+ %10 = call ptr @llvm.returnaddress(i32 0)
+ %11 = ptrtoint ptr %10 to i64
+ %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11)
+ %13 = inttoptr i64 %12 to ptr addrspace(1)
+ store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ br label %14
+
+14: ; preds = %Malloc, %WId
+ %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
+ call void @llvm.amdgcn.s.barrier()
+ %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+ %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15
+ %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+ %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17
+ store i8 7, ptr addrspace(3) %16, align 4
+ store i32 8, ptr addrspace(3) %18, align 2
+ br label %CondFree
+
+CondFree: ; preds = %14
+ call void @llvm.amdgcn.s.barrier()
+ br i1 %xyzCond, label %Free, label %End
+
+Free: ; preds = %CondFree
+ %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+ %20 = call ptr @llvm.returnaddress(i32 0)
+ %21 = ptrtoint ptr %20 to i64
+ %22 = ptrtoint ptr addrspace(1) %19 to i64
+ call void @__asan_free_impl(i64 %22, i64 %21)
+ br label %End
+
+End: ; preds = %Free, %CondFree
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr @llvm.returnaddress(i32 immarg) #2
+
+declare i64 @__asan_malloc_impl(i64, i64)
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #3
+
+declare void @__asan_free_impl(i64, i64)
+
+attributes #0 = { sanitize_address "amdgpu-lds-size"="24" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { convergent nocallback nofree nounwind willreturn }
+
+!0 = !{i32 0, i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="96" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR7]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll
deleted file mode 100644
index 44149b28fd9f9..0000000000000
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -passes=asan -S | FileCheck %s
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
-target triple = "amdgcn-amd-amdhsa"
-
-; Memory access to lds are not instrumented
-
- at count = addrspace(3) global [100 x i32] undef, align 16
-
-define protected amdgpu_kernel void @lds_store(i32 %i) sanitize_address {
-entry:
- ; CHECK-LABEL: @lds_store(
- ; CHECK-NOT: call {{[a-zA-Z]}}
- %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i
- store i32 0, ptr addrspace(3) %arrayidx1, align 4
- ret void
-}
-
-define protected amdgpu_kernel void @lds_load(i32 %i) sanitize_address {
-entry:
- ; CHECK-LABEL: @lds_load(
- ; CHECK-NOT: call {{[a-zA-Z]}}
- %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i
- %0 = load i32, ptr addrspace(3) %arrayidx1, align 4
- ret void
-}
-
-; CHECK-LABEL: define internal void @asan.module_ctor()
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
index cb37ba24f1c74..e5ce018ba0f40 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll
@@ -9,12 +9,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
-; CHECK: 4:
+; CHECK: 2:
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -31,15 +29,15 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
; CHECK-NEXT: br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
-; CHECK: 18:
+; CHECK: 16:
; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP19]]
-; CHECK: 19:
+; CHECK: 17:
; CHECK-NEXT: br label [[TMP20]]
-; CHECK: 20:
+; CHECK: 18:
; CHECK-NEXT: br label [[TMP21]]
-; CHECK: 21:
+; CHECK: 19:
; CHECK-NEXT: store i32 0, ptr [[Q]], align 4
; CHECK-NEXT: ret void
;
@@ -47,12 +45,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; RECOV-NEXT: entry:
; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
-; RECOV: 4:
+; RECOV: 2:
; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -68,9 +64,9 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i)
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_store4_noabort(i64 [[TMP5]]) #[[ATTR3:[0-9]+]]
; RECOV-NEXT: br label [[TMP16]]
-; RECOV: 16:
+; RECOV: 14:
; RECOV-NEXT: br label [[TMP17]]
-; RECOV: 17:
+; RECOV: 15:
; RECOV-NEXT: store i32 0, ptr [[Q]], align 4
; RECOV-NEXT: ret void
;
@@ -86,12 +82,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]]
-; CHECK: 4:
+; CHECK: 2:
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -108,15 +102,15 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
; CHECK-NEXT: br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]]
-; CHECK: 18:
+; CHECK: 16:
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR5]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP19]]
-; CHECK: 19:
+; CHECK: 17:
; CHECK-NEXT: br label [[TMP20]]
-; CHECK: 20:
+; CHECK: 18:
; CHECK-NEXT: br label [[TMP21]]
-; CHECK: 21:
+; CHECK: 19:
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[Q]], align 4
; CHECK-NEXT: ret void
;
@@ -124,12 +118,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
; RECOV-NEXT: entry:
; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]]
-; RECOV: 4:
+; RECOV: 2:
; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -145,9 +137,9 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_load4_noabort(i64 [[TMP5]]) #[[ATTR3]]
; RECOV-NEXT: br label [[TMP16]]
-; RECOV: 16:
+; RECOV: 14:
; RECOV-NEXT: br label [[TMP17]]
-; RECOV: 17:
+; RECOV: 15:
; RECOV-NEXT: [[R:%.*]] = load i32, ptr [[Q]], align 4
; RECOV-NEXT: ret void
;
@@ -163,12 +155,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
-; CHECK: 4:
+; CHECK: 2:
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -180,15 +170,15 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
-; CHECK: 13:
+; CHECK: 11:
; CHECK-NEXT: call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR5]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 14:
+; CHECK: 12:
; CHECK-NEXT: br label [[TMP15]]
-; CHECK: 15:
+; CHECK: 13:
; CHECK-NEXT: br label [[TMP16]]
-; CHECK: 16:
+; CHECK: 14:
; CHECK-NEXT: store i64 0, ptr [[Q]], align 8
; CHECK-NEXT: ret void
;
@@ -196,12 +186,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
; RECOV-NEXT: entry:
; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
-; RECOV: 4:
+; RECOV: 2:
; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -212,9 +200,9 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_store8_noabort(i64 [[TMP5]]) #[[ATTR3]]
; RECOV-NEXT: br label [[TMP11]]
-; RECOV: 11:
+; RECOV: 9:
; RECOV-NEXT: br label [[TMP12]]
-; RECOV: 12:
+; RECOV: 10:
; RECOV-NEXT: store i64 0, ptr [[Q]], align 8
; RECOV-NEXT: ret void
;
@@ -229,12 +217,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]]
-; CHECK: 4:
+; CHECK: 2:
; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -246,15 +232,15 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
-; CHECK: 13:
+; CHECK: 11:
; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR5]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 14:
+; CHECK: 12:
; CHECK-NEXT: br label [[TMP15]]
-; CHECK: 15:
+; CHECK: 13:
; CHECK-NEXT: br label [[TMP16]]
-; CHECK: 16:
+; CHECK: 14:
; CHECK-NEXT: [[R:%.*]] = load i64, ptr [[Q]], align 8
; CHECK-NEXT: ret void
;
@@ -262,12 +248,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] {
; RECOV-NEXT: entry:
; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
-; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]])
; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]])
-; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true
+; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true
; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
-; RECOV: 4:
+; RECOV: 2:
; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64
; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880
@@ -278,9 +262,9 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_load8_noabort(i64 [[TMP5]]) #[[ATTR3]]
; RECOV-NEXT: br label [[TMP11]]
-; RECOV: 11:
+; RECOV: 9:
; RECOV-NEXT: br label [[TMP12]]
-; RECOV: 12:
+; RECOV: 10:
; RECOV-NEXT: [[R:%.*]] = load i64, ptr [[Q]], align 8
; RECOV-NEXT: ret void
;
>From b761645f68227f6c8eaec57941fd61c62ad54603 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 23 May 2024 13:01:22 +0530
Subject: [PATCH 2/2] [AMDGPU] Enable flag to choose instrumentation of LDS.
[AMDGPU] Update the MD initializer and donot replace uses of MD global.
---
.../Instrumentation/AddressSanitizer.cpp | 140 ++++++------------
.../asan-dynamic-lds-indirect-access.ll | 127 ++++++----------
.../AMDGPU/asan-dynamic-lds-test.ll | 66 +++------
.../AMDGPU/asan-static-indirect-access.ll | 126 ++++++----------
...atic-lds-indirect-access-function-param.ll | 53 +++----
.../AMDGPU/asan-static-lds-test.ll | 105 +++++--------
6 files changed, 224 insertions(+), 393 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index a312f1d50fe9a..2cead6cdc273a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -232,6 +232,11 @@ static cl::opt<bool>
cl::desc("instrument byval call arguments"), cl::Hidden,
cl::init(true));
+static cl::opt<bool>
+ ClInstrumentAMDGPULDS("asan-instrument-amdgpu-lds",
+ cl::desc("instrument amdgpu LDS accesses"),
+ cl::Hidden, cl::init(true));
+
static cl::opt<bool> ClAlwaysSlowPath(
"asan-always-slow-path",
cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
@@ -1293,8 +1298,8 @@ static GlobalVariable *getKernelSwLDSBaseGlobal(Module &M) {
static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset,
bool UsesDynLDS) {
if (Offset != 0) {
- std::string Buffer;
- raw_string_ostream SS{Buffer};
+ SmallString<256> Buffer;
+ raw_svector_ostream SS(Buffer);
SS << format("%u", Offset);
if (UsesDynLDS)
SS << format(",%u", Offset);
@@ -1312,33 +1317,30 @@ static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV,
MDNode::get(Ctx, {MinC, MaxC}));
}
-static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
+/// Update SwLDS Metadata global initializer with redzone info.
+static SmallVector<std::pair<uint32_t, uint32_t>, 64>
+UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
Module *M = F.getParent();
GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
if (!SwLDSMetadataGlobal || !SwLDSGlobal)
- return;
+ return {};
LLVMContext &Ctx = M->getContext();
Type *Int32Ty = Type::getInt32Ty(Ctx);
-
+ SmallVector<std::pair<uint32_t, uint32_t>, 64> RedzoneOffsetAndSizeVector;
Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
- Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne());
Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne());
StructType *MDStructType =
cast<StructType>(SwLDSMetadataGlobal->getValueType());
- assert(MDStructType);
unsigned NumStructs = MDStructType->getNumElements();
-
- std::vector<Type *> Items;
std::vector<Constant *> Initializers;
uint32_t MallocSize = 0;
- //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size}
- StructType *LDSItemTy = StructType::create(
- Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int32Ty}, "");
+ StructType *LDSItemTy =
+ cast<StructType>(MDStructType->getStructElementType(0));
+
for (unsigned i = 0; i < NumStructs; i++) {
- Items.push_back(LDSItemTy);
ConstantStruct *member =
dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
Constant *NewInitItem;
@@ -1353,91 +1355,48 @@ static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
const uint64_t RightRedzoneSize =
getRedzoneSizeForGlobal(Scale, GlobalSizeValue);
MallocSize += GlobalSizeValue;
- Constant *NewItemRedzoneStartOffset =
- ConstantInt::get(Int32Ty, MallocSize);
+ RedzoneOffsetAndSizeVector.emplace_back(MallocSize, RightRedzoneSize);
MallocSize += RightRedzoneSize;
- Constant *NewItemRedzoneSize =
- ConstantInt::get(Int32Ty, RightRedzoneSize);
-
unsigned NewItemAlignGlobalPlusRedzoneSize =
alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign);
Constant *NewItemAlignGlobalPlusRedzoneSizeConst =
ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize);
NewInitItem = ConstantStruct::get(
LDSItemTy, {NewItemStartOffset, NewItemGlobalSizeConst,
- NewItemAlignGlobalPlusRedzoneSizeConst,
- NewItemRedzoneStartOffset, NewItemRedzoneSize});
+ NewItemAlignGlobalPlusRedzoneSizeConst});
MallocSize = alignTo(MallocSize, LDSAlign);
} else {
Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
Constant *zero = ConstantInt::get(Int32Ty, 0);
- NewInitItem = ConstantStruct::get(
- LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+ NewInitItem =
+ ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero});
+ RedzoneOffsetAndSizeVector.emplace_back(0, 0);
}
} else {
Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
Constant *zero = ConstantInt::get(Int32Ty, 0);
- NewInitItem = ConstantStruct::get(
- LDSItemTy, {CurrMallocSize, zero, zero, zero, zero});
+ NewInitItem =
+ ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero});
+ RedzoneOffsetAndSizeVector.emplace_back(0, 0);
}
Initializers.push_back(NewInitItem);
}
GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F);
- bool usesDynLDS = SwDynLDS ? true : false;
+ bool usesDynLDS = SwDynLDS != nullptr;
updateLDSSizeFnAttr(&F, MallocSize, usesDynLDS);
if (usesDynLDS)
recordLDSAbsoluteAddress(*M, SwDynLDS, MallocSize);
- StructType *MetadataStructType = StructType::create(Ctx, Items, "");
-
- GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable(
- *M, MetadataStructType, false, GlobalValue::InternalLinkage,
- PoisonValue::get(MetadataStructType), "", nullptr,
- GlobalValue::NotThreadLocal, 1, false);
- Constant *Data = ConstantStruct::get(MetadataStructType, Initializers);
- NewSwLDSMetadataGlobal->setInitializer(Data);
- NewSwLDSMetadataGlobal->setAlignment(MdAlign);
- GlobalValue::SanitizerMetadata MD;
- MD.NoAddress = true;
- NewSwLDSMetadataGlobal->setSanitizerMetadata(MD);
-
- for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(U.getUser())) {
- SmallVector<Constant *> Indices;
- for (Use &Idx : GEP->indices()) {
- Indices.push_back(cast<Constant>(Idx));
- }
- Constant *NewGEP = ConstantExpr::getGetElementPtr(
- MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
- GEP->replaceAllUsesWith(NewGEP);
- } else if (LoadInst *Load = dyn_cast<LoadInst>(U.getUser())) {
- Constant *zero = ConstantInt::get(Int32Ty, 0);
- SmallVector<Constant *> Indices{zero, zero, zero};
- Constant *NewGEP = ConstantExpr::getGetElementPtr(
- MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
- IRBuilder<> IRB(Load);
- LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP);
- Load->replaceAllUsesWith(NewLoad);
- Load->eraseFromParent();
- } else if (StoreInst *Store = dyn_cast<StoreInst>(U.getUser())) {
- Constant *zero = ConstantInt::get(Int32Ty, 0);
- SmallVector<Constant *> Indices{zero, zero, zero};
- Constant *NewGEP = ConstantExpr::getGetElementPtr(
- MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
- IRBuilder<> IRB(Store);
- StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP);
- Store->replaceAllUsesWith(NewStore);
- Store->eraseFromParent();
- } else
- report_fatal_error("AMDGPU Sw LDS Metadata User instruction not handled");
- }
- SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal);
- NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal);
- SwLDSMetadataGlobal->eraseFromParent();
- return;
+ Constant *Data = ConstantStruct::get(MDStructType, Initializers);
+ SwLDSMetadataGlobal->setInitializer(Data);
+ return RedzoneOffsetAndSizeVector;
}
-static void poisonRedzonesForSwLDS(Function &F) {
+/// Poison redzone regions using the redzone size and offset info.
+static void
+poisonRedzonesForSwLDS(Function &F,
+ SmallVector<std::pair<uint32_t, uint32_t>, 64>
+ &RedzoneOffsetAndSizeVector) {
Module *M = F.getParent();
GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
@@ -1470,6 +1429,7 @@ static void poisonRedzonesForSwLDS(Function &F) {
unsigned NumStructs = MDStructType->getNumElements();
Value *StoreMallocPointer = SI->getValueOperand();
+ assert(RedzoneOffsetAndSizeVector.size() == NumStructs);
for (unsigned i = 0; i < NumStructs; i++) {
ConstantStruct *member =
dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
@@ -1484,35 +1444,28 @@ static void poisonRedzonesForSwLDS(Function &F) {
continue;
IRBuilder<> IRB(SI);
IRB.SetInsertPoint(SI->getNextNode());
+ auto &RedzonePair = RedzoneOffsetAndSizeVector[i];
+ uint64_t RedzoneOffset = RedzonePair.first;
+ uint64_t RedzoneSize = RedzonePair.second;
- auto *GEPForOffset = IRB.CreateInBoundsGEP(
- MDStructType, SwLDSMetadataGlobal,
- {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)});
-
- auto *GEPForSize = IRB.CreateInBoundsGEP(
- MDStructType, SwLDSMetadataGlobal,
- {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(4)});
-
- Value *RedzoneOffset = IRB.CreateLoad(IRB.getInt32Ty(), GEPForOffset);
- RedzoneOffset = IRB.CreateZExt(RedzoneOffset, IRB.getInt64Ty());
Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP(
- IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset});
+ IRB.getInt8Ty(), StoreMallocPointer, {IRB.getInt64(RedzoneOffset)});
Value *RedzoneAddress =
IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty());
- Value *RedzoneSize = IRB.CreateLoad(IRB.getInt32Ty(), GEPForSize);
- RedzoneSize = IRB.CreateZExt(RedzoneSize, IRB.getInt64Ty());
- IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, RedzoneSize});
+ IRB.CreateCall(AsanPoisonRegion,
+ {RedzoneAddress, IRB.getInt64(RedzoneSize)});
}
}
- return;
}
+/// Update SwLDS Metadata global initializer with redzone info.
+/// Poison redzone regions using the redzone size and offset info.
static void preProcessAMDGPULDSAccesses(Module &M, int Scale) {
for (Function &F : M) {
- UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
- poisonRedzonesForSwLDS(F);
+ auto RedzoneOffsetAndSizeVector =
+ UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
+ poisonRedzonesForSwLDS(F, RedzoneOffsetAndSizeVector);
}
- return;
}
AddressSanitizerPass::AddressSanitizerPass(
@@ -1527,7 +1480,7 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
Triple TargetTriple = Triple(M.getTargetTriple());
- if (TargetTriple.isAMDGPU()) {
+ if (TargetTriple.isAMDGPU() && ClInstrumentAMDGPULDS) {
unsigned LongSize = M.getDataLayout().getPointerSizeInBits();
ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false);
preProcessAMDGPULDSAccesses(M, Mapping.Scale);
@@ -2147,7 +2100,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
Value *AddrLong;
- if (TargetTriple.isAMDGCN()) {
+ if (TargetTriple.isAMDGPU() && ClInstrumentAMDGPULDS) {
Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
if (PtrTy->getPointerAddressSpace() == 3) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -2168,6 +2121,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
SwLDS = IRB.CreateIntToPtr(IRB.getInt32(0), IRB.getPtrTy(3));
}
}
+ assert(SwLDS && "Invalid AMDGPU Sw LDS base ptr");
Value *PtrToInt = IRB.CreatePtrToInt(Addr, IRB.getInt32Ty());
Value *LoadMallocPtr = IRB.CreateLoad(IRB.getPtrTy(1), SwLDS);
Value *GEP =
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
index f37fbf350ffde..8ef11f313f765 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -11,12 +11,11 @@
@llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
; Function Attrs: sanitize_address
-;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 0 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -229,14 +228,14 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB60:.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB48:.*]]
; CHECK: [[MALLOC]]:
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
-; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
@@ -261,19 +260,19 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: br label %[[BB26]]
; CHECK: [[BB26]]:
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
+; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 7
; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 8
; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8
-; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
+; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
-; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT: store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
+; CHECK-NEXT: store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4
; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], 7
; CHECK-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], 8
; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], 8
-; CHECK-NEXT: store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP31]], [[TMP35]]
; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
; CHECK-NEXT: [[TMP38:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -281,34 +280,22 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: [[TMP40:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP37]], i64 [[TMP39]])
; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP41]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
-; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP43]]
+; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 68
; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64
-; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 [[TMP47]])
-; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT: [[TMP49:%.*]] = zext i32 [[TMP48]] to i64
-; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP49]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 28)
+; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 33
; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP50]] to i64
-; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP52]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 [[TMP53]])
-; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT: [[TMP55:%.*]] = zext i32 [[TMP54]] to i64
-; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP55]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 31)
+; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 8
; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64
-; CHECK-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 [[TMP59]])
-; CHECK-NEXT: br label %[[BB60]]
-; CHECK: [[BB60]]:
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 24)
+; CHECK-NEXT: br label %[[BB48]]
+; CHECK: [[BB48]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP61]]
-; CHECK-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP63]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
; CHECK-NEXT: call void @use_variables()
@@ -327,16 +314,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: [[TMP77:%.*]] = and i1 [[TMP73]], [[TMP76]]
; CHECK-NEXT: [[TMP78:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP77]])
; CHECK-NEXT: [[TMP79:%.*]] = icmp ne i64 [[TMP78]], 0
-; CHECK-NEXT: br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB82:.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB70:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT1]]:
-; CHECK-NEXT: br i1 [[TMP77]], label %[[BB80:.*]], label %[[BB81:.*]]
-; CHECK: [[BB80]]:
+; CHECK-NEXT: br i1 [[TMP77]], label %[[BB68:.*]], label %[[BB69:.*]]
+; CHECK: [[BB68]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP68]]) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB81]]
-; CHECK: [[BB81]]:
-; CHECK-NEXT: br label %[[BB82]]
-; CHECK: [[BB82]]:
+; CHECK-NEXT: br label %[[BB69]]
+; CHECK: [[BB69]]:
+; CHECK-NEXT: br label %[[BB70]]
+; CHECK: [[BB70]]:
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP62]], align 1
; CHECK-NEXT: [[TMP83:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i64
; CHECK-NEXT: [[TMP84:%.*]] = add i64 [[TMP83]], 3
@@ -356,16 +343,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: [[TMP98:%.*]] = and i1 [[TMP94]], [[TMP97]]
; CHECK-NEXT: [[TMP99:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP98]])
; CHECK-NEXT: [[TMP100:%.*]] = icmp ne i64 [[TMP99]], 0
-; CHECK-NEXT: br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB103:.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB91:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT2]]:
-; CHECK-NEXT: br i1 [[TMP98]], label %[[BB101:.*]], label %[[BB102:.*]]
-; CHECK: [[BB101]]:
+; CHECK-NEXT: br i1 [[TMP98]], label %[[BB89:.*]], label %[[BB90:.*]]
+; CHECK: [[BB89]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP89]], i64 4) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB102]]
-; CHECK: [[BB102]]:
-; CHECK-NEXT: br label %[[BB103]]
-; CHECK: [[BB103]]:
+; CHECK-NEXT: br label %[[BB90]]
+; CHECK: [[BB90]]:
+; CHECK-NEXT: br label %[[BB91]]
+; CHECK: [[BB91]]:
; CHECK-NEXT: [[TMP104:%.*]] = ptrtoint ptr addrspace(3) [[TMP85]] to i32
; CHECK-NEXT: [[TMP105:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP105]], i32 [[TMP104]]
@@ -381,16 +368,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: [[TMP116:%.*]] = and i1 [[TMP112]], [[TMP115]]
; CHECK-NEXT: [[TMP117:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP116]])
; CHECK-NEXT: [[TMP118:%.*]] = icmp ne i64 [[TMP117]], 0
-; CHECK-NEXT: br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB121:.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB109:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT3]]:
-; CHECK-NEXT: br i1 [[TMP116]], label %[[BB119:.*]], label %[[BB120:.*]]
-; CHECK: [[BB119]]:
+; CHECK-NEXT: br i1 [[TMP116]], label %[[BB107:.*]], label %[[BB108:.*]]
+; CHECK: [[BB107]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP107]], i64 4) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB120]]
-; CHECK: [[BB120]]:
-; CHECK-NEXT: br label %[[BB121]]
-; CHECK: [[BB121]]:
+; CHECK-NEXT: br label %[[BB108]]
+; CHECK: [[BB108]]:
+; CHECK-NEXT: br label %[[BB109]]
+; CHECK: [[BB109]]:
; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP64]], align 2
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
@@ -415,7 +402,7 @@ WId:
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %26
-Malloc: ; preds = %WId
+Malloc:
%6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
%8 = add i32 %6, %7
@@ -445,7 +432,7 @@ Malloc: ; preds = %WId
store ptr addrspace(1) %25, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
br label %26
-26: ; preds = %Malloc, %WId
+26:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%27 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -458,11 +445,11 @@ Malloc: ; preds = %WId
store i32 8, ptr addrspace(3) %30, align 2
br label %CondFree
-CondFree: ; preds = %26
+CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End
-Free: ; preds = %CondFree
+Free:
%31 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
%32 = call ptr @llvm.returnaddress(i32 0)
%33 = ptrtoint ptr %32 to i64
@@ -470,36 +457,20 @@ Free: ; preds = %CondFree
call void @__asan_free_impl(i64 %34, i64 %33)
br label %End
-End: ; preds = %Free, %CondFree
+End:
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
-declare void @llvm.donothing() #2
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.donothing() #2
declare i32 @llvm.amdgcn.workitem.id.x() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.y() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.z() #3
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #2
-
declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #4
-
declare void @__asan_free_impl(i64, i64)
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.lds.kernel.id() #3
attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
index 1be50f48aa6f6..e64a6fb8aca5f 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -12,7 +12,7 @@
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 0 } }, no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -29,14 +29,14 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB43:.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB39:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
-; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
@@ -61,11 +61,11 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: br label %[[BB26]]
; CHECK: [[BB26]]:
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
-; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 0
; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 1
; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 1
-; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -73,18 +73,14 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]])
; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i64
-; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 [[TMP38]]
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 8
; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
-; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP42:%.*]] = zext i32 [[TMP41]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 [[TMP42]])
-; CHECK-NEXT: br label %[[BB43]]
-; CHECK: [[BB43]]:
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 24)
+; CHECK-NEXT: br label %[[BB39]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32
@@ -102,16 +98,16 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]]
; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
-; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB59:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT1]]:
-; CHECK-NEXT: br i1 [[TMP58]], label %[[BB61:.*]], label %[[BB62:.*]]
-; CHECK: [[BB61]]:
+; CHECK-NEXT: br i1 [[TMP58]], label %[[BB57:.*]], label %[[BB58:.*]]
+; CHECK: [[BB57]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB62]]
-; CHECK: [[BB62]]:
-; CHECK-NEXT: br label %[[BB63]]
-; CHECK: [[BB63]]:
+; CHECK-NEXT: br label %[[BB58]]
+; CHECK: [[BB58]]:
+; CHECK-NEXT: br label %[[BB59]]
+; CHECK: [[BB59]]:
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP45]], align 4
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
@@ -136,7 +132,7 @@ WId:
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %21
-Malloc: ; preds = %WId
+Malloc:
%6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
%8 = add i32 %6, %7
@@ -158,7 +154,7 @@ Malloc: ; preds = %WId
store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
br label %21
-21: ; preds = %Malloc, %WId
+21:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -167,11 +163,11 @@ Malloc: ; preds = %WId
store i8 7, ptr addrspace(3) %23, align 4
br label %CondFree
-CondFree: ; preds = %21
+CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End
-Free: ; preds = %CondFree
+Free:
%24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
%25 = call ptr @llvm.returnaddress(i32 0)
%26 = ptrtoint ptr %25 to i64
@@ -179,31 +175,17 @@ Free: ; preds = %CondFree
call void @__asan_free_impl(i64 %27, i64 %26)
br label %End
-End: ; preds = %Free, %CondFree
+End:
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare void @llvm.donothing() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #1
-
declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #3
declare void @__asan_free_impl(i64, i64)
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
index 23f27aa797e73..57d91cafcab56 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -10,11 +10,10 @@
@llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
; Function Attrs: sanitize_address
-;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
-; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 3, i32 32, i32 99, i32 29 }, %1 { i32 128, i32 4, i32 32, i32 132, i32 28 } }, no_sanitize_address, align 1
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -231,10 +230,10 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB44:.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
; CHECK: [[MALLOC]]:
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -242,48 +241,28 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 3), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 4), align 4
-; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 3), align 4
-; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 4), align 4
-; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
-; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 29)
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
-; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
-; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP33]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 28)
+; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP34]] to i64
-; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP35]], i64 [[TMP37]])
-; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP38]] to i64
-; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP39]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP35]], i64 31)
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64
-; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 [[TMP43]])
-; CHECK-NEXT: br label %[[BB44]]
-; CHECK: [[BB44]]:
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 24)
+; CHECK-NEXT: br label %[[BB24]]
+; CHECK: [[BB24]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP47]]
; CHECK-NEXT: call void @use_variables()
; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(3) [[TMP46]] to i32
@@ -301,16 +280,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: [[TMP61:%.*]] = and i1 [[TMP57]], [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP61]])
; CHECK-NEXT: [[TMP63:%.*]] = icmp ne i64 [[TMP62]], 0
-; CHECK-NEXT: br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB66:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF1]]
; CHECK: [[ASAN_REPORT]]:
-; CHECK-NEXT: br i1 [[TMP61]], label %[[BB64:.*]], label %[[BB65:.*]]
-; CHECK: [[BB64]]:
+; CHECK-NEXT: br i1 [[TMP61]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK: [[BB44]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP52]]) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB65]]
-; CHECK: [[BB65]]:
-; CHECK-NEXT: br label %[[BB66]]
-; CHECK: [[BB66]]:
+; CHECK-NEXT: br label %[[BB45]]
+; CHECK: [[BB45]]:
+; CHECK-NEXT: br label %[[BB46]]
+; CHECK: [[BB46]]:
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP46]], align 1
; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i64
; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], 3
@@ -330,16 +309,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: [[TMP82:%.*]] = and i1 [[TMP78]], [[TMP81]]
; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP82]])
; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i64 [[TMP83]], 0
-; CHECK-NEXT: br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB87:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB67:.*]], !prof [[PROF1]]
; CHECK: [[ASAN_REPORT1]]:
-; CHECK-NEXT: br i1 [[TMP82]], label %[[BB85:.*]], label %[[BB86:.*]]
-; CHECK: [[BB85]]:
+; CHECK-NEXT: br i1 [[TMP82]], label %[[BB65:.*]], label %[[BB66:.*]]
+; CHECK: [[BB65]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP73]], i64 4) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB86]]
-; CHECK: [[BB86]]:
-; CHECK-NEXT: br label %[[BB87]]
-; CHECK: [[BB87]]:
+; CHECK-NEXT: br label %[[BB66]]
+; CHECK: [[BB66]]:
+; CHECK-NEXT: br label %[[BB67]]
+; CHECK: [[BB67]]:
; CHECK-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(3) [[TMP69]] to i32
; CHECK-NEXT: [[TMP89:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP89]], i32 [[TMP88]]
@@ -355,16 +334,16 @@ define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: [[TMP100:%.*]] = and i1 [[TMP96]], [[TMP99]]
; CHECK-NEXT: [[TMP101:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP100]])
; CHECK-NEXT: [[TMP102:%.*]] = icmp ne i64 [[TMP101]], 0
-; CHECK-NEXT: br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB105:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB85:.*]], !prof [[PROF1]]
; CHECK: [[ASAN_REPORT2]]:
-; CHECK-NEXT: br i1 [[TMP100]], label %[[BB103:.*]], label %[[BB104:.*]]
-; CHECK: [[BB103]]:
+; CHECK-NEXT: br i1 [[TMP100]], label %[[BB83:.*]], label %[[BB84:.*]]
+; CHECK: [[BB83]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP91]], i64 4) #[[ATTR8]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB104]]
-; CHECK: [[BB104]]:
-; CHECK-NEXT: br label %[[BB105]]
-; CHECK: [[BB105]]:
+; CHECK-NEXT: br label %[[BB84]]
+; CHECK: [[BB84]]:
+; CHECK-NEXT: br label %[[BB85]]
+; CHECK: [[BB85]]:
; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP48]], align 2
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
@@ -389,7 +368,7 @@ WId:
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %14
-Malloc: ; preds = %WId
+Malloc:
%6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
%8 = add i32 %6, %7
@@ -401,7 +380,7 @@ Malloc: ; preds = %WId
store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
br label %14
-14: ; preds = %Malloc, %WId
+14:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -413,11 +392,11 @@ Malloc: ; preds = %WId
store i32 8, ptr addrspace(3) %18, align 2
br label %CondFree
-CondFree: ; preds = %14
+CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End
-Free: ; preds = %CondFree
+Free:
%19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
%20 = call ptr @llvm.returnaddress(i32 0)
%21 = ptrtoint ptr %20 to i64
@@ -425,30 +404,17 @@ Free: ; preds = %CondFree
call void @__asan_free_impl(i64 %22, i64 %21)
br label %End
-End: ; preds = %Free, %CondFree
+End:
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #3
-
declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #4
-
declare void @__asan_free_impl(i64, i64)
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.lds.kernel.id() #2
attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
index 072174db11f25..c14f772cb1b02 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
%llvm.amdgcn.sw.lds.my_kernel.md.type = type { %llvm.amdgcn.sw.lds.my_kernel.md.item, %llvm.amdgcn.sw.lds.my_kernel.md.item }
%llvm.amdgcn.sw.lds.my_kernel.md.item = type { i32, i32, i32 }
@@ -10,7 +10,7 @@
; Function Attrs: sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 4096, i32 5120, i32 4128, i32 1024 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -68,10 +68,10 @@ define amdgpu_kernel void @my_kernel() #1 {
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB26:.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -79,25 +79,17 @@ define amdgpu_kernel void @my_kernel() #1 {
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 4128
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 1024)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT: br label %[[BB26]]
-; CHECK: [[BB26]]:
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 24)
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP27]]
; CHECK-NEXT: [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP28]], i32 0, i32 0
; CHECK-NEXT: call void @my_function(ptr addrspace(3) [[LDS_PTR]])
@@ -124,7 +116,7 @@ WId:
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %14
-Malloc: ; preds = %WId
+Malloc:
%6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4
%8 = add i32 %6, %7
@@ -136,7 +128,7 @@ Malloc: ; preds = %WId
store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
br label %14
-14: ; preds = %Malloc, %WId
+14:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4
@@ -145,11 +137,11 @@ Malloc: ; preds = %WId
call void @my_function(ptr addrspace(3) %lds_ptr)
br label %CondFree
-CondFree: ; preds = %14
+CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End
-Free: ; preds = %CondFree
+Free:
%17 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8
%18 = call ptr @llvm.returnaddress(i32 0)
%19 = ptrtoint ptr %18 to i64
@@ -157,27 +149,16 @@ Free: ; preds = %CondFree
call void @__asan_free_impl(i64 %20, i64 %19)
br label %End
-End: ; preds = %Free, %CondFree
+End:
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #3
-
declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #4
-
declare void @__asan_free_impl(i64, i64)
attributes #0 = { sanitize_address }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
index 99dbf6c607ab5..f3a905882153d 100755
--- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }
@@ -10,7 +10,7 @@
; Function Attrs: sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
-; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 } }, no_sanitize_address, align 1
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
@@ -27,10 +27,10 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
; CHECK: [[MALLOC]]:
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
@@ -38,34 +38,22 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4
-; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]])
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4
-; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4
-; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]])
-; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4
-; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64
-; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]]
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 31)
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64
-; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]])
-; CHECK-NEXT: br label %[[BB32]]
-; CHECK: [[BB32]]:
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 24)
+; CHECK-NEXT: br label %[[BB20]]
+; CHECK: [[BB20]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP35]]
; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32
; CHECK-NEXT: [[TMP38:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
@@ -82,16 +70,16 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]])
; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0
-; CHECK-NEXT: br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB42:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[ASAN_REPORT]]:
-; CHECK-NEXT: br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]]
-; CHECK: [[BB52]]:
+; CHECK-NEXT: br i1 [[TMP49]], label %[[BB40:.*]], label %[[BB41:.*]]
+; CHECK: [[BB40]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB53]]
-; CHECK: [[BB53]]:
-; CHECK-NEXT: br label %[[BB54]]
-; CHECK: [[BB54]]:
+; CHECK-NEXT: br label %[[BB41]]
+; CHECK: [[BB41]]:
+; CHECK-NEXT: br label %[[BB42]]
+; CHECK: [[BB42]]:
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP34]], align 4
; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i64
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], 3
@@ -111,16 +99,16 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP70:%.*]] = and i1 [[TMP66]], [[TMP69]]
; CHECK-NEXT: [[TMP71:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP70]])
; CHECK-NEXT: [[TMP72:%.*]] = icmp ne i64 [[TMP71]], 0
-; CHECK-NEXT: br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB75:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF1]]
; CHECK: [[ASAN_REPORT1]]:
-; CHECK-NEXT: br i1 [[TMP70]], label %[[BB73:.*]], label %[[BB74:.*]]
-; CHECK: [[BB73]]:
+; CHECK-NEXT: br i1 [[TMP70]], label %[[BB61:.*]], label %[[BB62:.*]]
+; CHECK: [[BB61]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP61]], i64 4) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB74]]
-; CHECK: [[BB74]]:
-; CHECK-NEXT: br label %[[BB75]]
-; CHECK: [[BB75]]:
+; CHECK-NEXT: br label %[[BB62]]
+; CHECK: [[BB62]]:
+; CHECK-NEXT: br label %[[BB63]]
+; CHECK: [[BB63]]:
; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr addrspace(3) [[TMP57]] to i32
; CHECK-NEXT: [[TMP77:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP77]], i32 [[TMP76]]
@@ -136,16 +124,16 @@ define amdgpu_kernel void @k0() #0 {
; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]]
; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]])
; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0
-; CHECK-NEXT: br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB81:.*]], !prof [[PROF1]]
; CHECK: [[ASAN_REPORT2]]:
-; CHECK-NEXT: br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]]
-; CHECK: [[BB91]]:
+; CHECK-NEXT: br i1 [[TMP88]], label %[[BB79:.*]], label %[[BB80:.*]]
+; CHECK: [[BB79]]:
; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP79]], i64 4) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label %[[BB92]]
-; CHECK: [[BB92]]:
-; CHECK-NEXT: br label %[[BB93]]
-; CHECK: [[BB93]]:
+; CHECK-NEXT: br label %[[BB80]]
+; CHECK: [[BB80]]:
+; CHECK-NEXT: br label %[[BB81]]
+; CHECK: [[BB81]]:
; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP36]], align 2
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
@@ -170,7 +158,7 @@ WId:
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %14
-Malloc: ; preds = %WId
+Malloc:
%6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
%8 = add i32 %6, %7
@@ -182,7 +170,7 @@ Malloc: ; preds = %WId
store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
br label %14
-14: ; preds = %Malloc, %WId
+14:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -193,11 +181,11 @@ Malloc: ; preds = %WId
store i32 8, ptr addrspace(3) %18, align 2
br label %CondFree
-CondFree: ; preds = %14
+CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End
-Free: ; preds = %CondFree
+Free:
%19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
%20 = call ptr @llvm.returnaddress(i32 0)
%21 = ptrtoint ptr %20 to i64
@@ -205,27 +193,16 @@ Free: ; preds = %CondFree
call void @__asan_free_impl(i64 %22, i64 %21)
br label %End
-End: ; preds = %Free, %CondFree
+End:
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.workitem.id.z() #1
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #2
-
declare i64 @__asan_malloc_impl(i64, i64)
-
-; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #3
-
declare void @__asan_free_impl(i64, i64)
attributes #0 = { sanitize_address "amdgpu-lds-size"="24" }
More information about the llvm-commits
mailing list