[llvm] [AMDGPU] Handle lowering addrspace casts from LDS to FLAT address in amdgpu-sw-lower-lds. (PR #121214)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 27 08:27:31 PST 2024
https://github.com/skc7 created https://github.com/llvm/llvm-project/pull/121214
"infer-address-spaces" pass replaces all refinable generic pointers with equivalent specific pointers.
At -O0 optimisation level, infer-address-spaces pass doesn't run in the pipeline.
"amdgpu-sw-lower-lds" pass instruments memory operations on addrspace(3) ptrs. Since, extra addrspacecasts are present from lds to flat addrspaces at -O0 and the actual store/load memory instructions are now on flat addrspace, these addrspacecast need to be handled in the amdgpu-sw-lower-lds pass itself. This patch lowers the lds ptr first to the corresponding ptr in the global memory from the asan_malloc. Then replaces the original cast with addrspacecast from global ptr to flat ptr.
Example:
->Before infer-address-spaces pass :
%asc = addrspacecast ptr addrspace(3) @lds to ptr
%gep = getelementptr inbounds [5 x i32], ptr %asc, i64 0, i64 0
store i32 1, ptr %gep, align 4
->After infer-address-spaces pass :
%gep = getelementptr inbounds [5 x i32], ptr addrspace(3) @lds, i64 0, i64 0
store i32 1, ptr addrspace(3) %gep, align 4
->Without infer-addrspaces pass and with this patch in amdgpu-sw-lower-lds:
; get corresponding global memory ptr from the asan_malloc allocation.
%load = load ptr addrspace(1), ptr addrspace(3) @<lds.kernel.global>, align 8
%gep1 = getelementptr inbounds i8, ptr addrspace(3) @<lds.kernel.global>, i32 <offset>
%ptoi = ptrtoint ptr addrspace(3) %gep1 to i32
%gep2 = getelementptr inbounds i8, ptr addrspace(1) %load, i32 %ptoi
%asc1 = addrspacecast ptr addrspace(1) %gep2 to ptr
%gep = getelementptr inbounds [5 x i32], ptr %asc1, i64 0, i64 0
store i32 1, ptr addrspace(3) %gep, align 4
>From dd47be059f5b6da15a521929f059f13ea994cfe8 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 27 Dec 2024 20:46:44 +0530
Subject: [PATCH] [AMDGPU] Handle lowering addrspace casts from LDS to FLAT
address.
---
llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 14 +
...gpu-sw-lower-lds-non-kernel-declaration.ll | 8 +-
...w-lower-lds-static-indirect-access-asan.ll | 98 ++---
...-lds-static-indirect-access-nested-asan.ll | 334 +++++++++---------
...lower-lds-static-indirect-access-nested.ll | 134 +++----
...static-indirect-access-no-kernel-lds-id.ll | 8 +-
...gpu-sw-lower-lds-static-indirect-access.ll | 39 +-
.../amdgpu-sw-lower-lds-static-lds-O0.ll | 76 ++++
8 files changed, 425 insertions(+), 286 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index 17207773b4858c..74b91950624681 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -655,6 +655,10 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) {
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
LDSInstructions.insert(&Inst);
+ } else if (AddrSpaceCastInst *AscI = dyn_cast<AddrSpaceCastInst>(&Inst)) {
+ if ((AscI->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) &&
+ (AscI->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS))
+ LDSInstructions.insert(&Inst);
} else
continue;
}
@@ -722,6 +726,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
AsanInfo.Instructions.insert(NewXCHG);
XCHG->replaceAllUsesWith(NewXCHG);
XCHG->eraseFromParent();
+ } else if (AddrSpaceCastInst *AscI = dyn_cast<AddrSpaceCastInst>(Inst)) {
+ Value *AIOperand = AscI->getPointerOperand();
+ Value *Gep =
+ getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, AIOperand);
+ Value *NewAI = IRB.CreateAddrSpaceCast(Gep, AscI->getType());
+ // Note: No need to add the instruction to AsanInfo instructions to be
+ // instrumented list. FLAT_ADDRESS ptr would have been already
+ // instrumented by asan pass prior to this pass.
+ AscI->replaceAllUsesWith(NewAI);
+ AscI->eraseFromParent();
} else
report_fatal_error("Unimplemented LDS lowering instruction");
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
index ae2bcbbb81b5f1..a6e6b84bba3046 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll
@@ -20,8 +20,12 @@ define void @non_kernel_function() sanitize_address {
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
-; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
-; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
+; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(1) [[TMP13]] to ptr
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
index 3a05f93df35a30..b9b4c90daea87d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
@@ -28,8 +28,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]]
-; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
-; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP34:%.*]] = addrspacecast ptr addrspace(1) [[TMP33]] to ptr
+; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP35]]
+; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(1) [[TMP36]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]]
@@ -45,16 +49,16 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP21]], [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP25]])
; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 0
-; CHECK-NEXT: br i1 [[TMP27]], label [[ASAN_REPORT:%.*]], label [[TMP30:%.*]], !prof [[PROF2:![0-9]+]]
-; CHECK: asan.report:
-; CHECK-NEXT: br i1 [[TMP25]], label [[TMP28:%.*]], label [[TMP29:%.*]]
-; CHECK: 28:
+; CHECK-NEXT: br i1 [[TMP27]], label %[[ASAN_REPORT:.*]], label %[[BB35:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP25]], label %[[BB33:.*]], label %[[BB34:.*]]
+; CHECK: [[BB33]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label [[TMP29]]
-; CHECK: 29:
-; CHECK-NEXT: br label [[TMP30]]
-; CHECK: 30:
+; CHECK-NEXT: br label %[[BB34]]
+; CHECK: [[BB34]]:
+; CHECK-NEXT: br label %[[BB35]]
+; CHECK: [[BB35]]:
; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8
; CHECK-NEXT: ret void
;
@@ -67,15 +71,15 @@ define void @use_variables() sanitize_address {
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@@ -100,9 +104,9 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 24:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB24]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -124,16 +128,16 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
-; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT:%.*]], label [[TMP46:%.*]], !prof [[PROF2]]
-; CHECK: asan.report:
-; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[CONDFREE:%.*]]
-; CHECK: 44:
+; CHECK-NEXT: br i1 [[TMP43]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT]]:
+; CHECK-NEXT: br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
+; CHECK: [[BB44]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label [[CONDFREE]]
-; CHECK: 45:
-; CHECK-NEXT: br label [[TMP46]]
-; CHECK: 46:
+; CHECK-NEXT: br label %[[BB45]]
+; CHECK: [[BB45]]:
+; CHECK-NEXT: br label %[[BB46]]
+; CHECK: [[BB46]]:
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1
; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]]
@@ -152,16 +156,16 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP59:%.*]] = and i1 [[TMP54]], [[TMP58]]
; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]])
; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0
-; CHECK-NEXT: br i1 [[TMP61]], label [[ASAN_REPORT1:%.*]], label [[TMP64:%.*]], !prof [[PROF2]]
-; CHECK: asan.report1:
-; CHECK-NEXT: br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]]
-; CHECK: 64:
+; CHECK-NEXT: br i1 [[TMP61]], label %[[ASAN_REPORT1:.*]], label %[[BB66:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT1]]:
+; CHECK-NEXT: br i1 [[TMP59]], label %[[BB64:.*]], label %[[BB65:.*]]
+; CHECK: [[BB64]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP83]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label [[TMP63]]
-; CHECK: 65:
-; CHECK-NEXT: br label [[TMP64]]
-; CHECK: 66:
+; CHECK-NEXT: br label %[[BB65]]
+; CHECK: [[BB65]]:
+; CHECK-NEXT: br label %[[BB66]]
+; CHECK: [[BB66]]:
; CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr addrspace(1) [[TMP82]] to i64
; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP84]], 3
; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[TMP85]], 2147450880
@@ -174,28 +178,28 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP76:%.*]] = and i1 [[TMP72]], [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP76]])
; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i64 [[TMP77]], 0
-; CHECK-NEXT: br i1 [[TMP78]], label [[ASAN_REPORT2:%.*]], label [[TMP81:%.*]], !prof [[PROF2]]
-; CHECK: asan.report2:
-; CHECK-NEXT: br i1 [[TMP76]], label [[TMP79:%.*]], label [[TMP80:%.*]]
-; CHECK: 79:
+; CHECK-NEXT: br i1 [[TMP78]], label %[[ASAN_REPORT2:.*]], label %[[BB81:.*]], !prof [[PROF2]]
+; CHECK: [[ASAN_REPORT2]]:
+; CHECK-NEXT: br i1 [[TMP76]], label %[[BB79:.*]], label %[[BB80:.*]]
+; CHECK: [[BB79]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP84]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
-; CHECK-NEXT: br label [[TMP80]]
-; CHECK: 80:
-; CHECK-NEXT: br label [[TMP81]]
-; CHECK: 81:
+; CHECK-NEXT: br label %[[BB80]]
+; CHECK: [[BB80]]:
+; CHECK-NEXT: br label %[[BB81]]
+; CHECK: [[BB81]]:
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2
-; CHECK-NEXT: br label [[CONDFREE1:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @use_variables()
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
index 1dd391ec6321a7..255dda562c1ea4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel.
@@ -6,50 +6,64 @@
@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]
+;.
+; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
+; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]], no_sanitize_address
+;.
define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
-; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
-; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24)
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
-; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 18:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 96
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 32)
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: call void @call_store_A()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
-; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
-; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @call_store_A()
@@ -58,56 +72,56 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK: Malloc:
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
-; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
-; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15
-; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4
-; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4
-; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4
-; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
+; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4
+; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
-; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 23:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24)
+; CHECK-NEXT: br label %[[BB23]]
+; CHECK: [[BB23]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
+; CHECK-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
-; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
-; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
+; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@@ -116,48 +130,48 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2(
-; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]])
-; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
-; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24)
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
-; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 18:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 96
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 32)
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: call void @store_A()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
-; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
-; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @store_A()
@@ -166,56 +180,56 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3(
-; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META6:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK: Malloc:
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
-; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
-; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
-; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15
-; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4
-; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4
-; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4
-; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
+; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
+; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3
+; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4
+; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]])
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
-; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
-; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
-; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 23:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24)
+; CHECK-NEXT: br label %[[BB23]]
+; CHECK: [[BB23]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
+; CHECK-NEXT: [[TMP24:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
-; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
-; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
-; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
-; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
+; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP24]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@@ -237,14 +251,16 @@ define private void @store_A() sanitize_address {
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
-; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
+; CHECK-NEXT: store ptr [[TMP11]], ptr null, align 8
; CHECK-NEXT: ret void
;
store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null
@@ -256,14 +272,16 @@ define private ptr @get_B_ptr() sanitize_address {
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
-; CHECK-NEXT: ret ptr [[TMP10]]
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
+; CHECK-NEXT: ret ptr [[TMP11]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
}
@@ -272,8 +290,6 @@ define private ptr @get_B_ptr() sanitize_address {
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
-; CHECK: [[META2]] = !{i32 0}
-; CHECK: [[META3]] = !{i32 1}
-; CHECK: [[META4]] = !{i32 2}
-; CHECK: [[META5]] = !{i32 3}
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
index ed9107764eb918..7184ebbb8faa33 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel.
@@ -6,18 +6,32 @@
@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]
+;.
+; @llvm.amdgcn.sw.lds.kernel_2 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_2.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_2.md.type { %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_2.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_1.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1:![0-9]+]]
+; @llvm.amdgcn.sw.lds.kernel_1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_1.md.type { %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_1.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_3 = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0]]
+; @llvm.amdgcn.kernel_3.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 4, !absolute_symbol [[META1]]
+; @llvm.amdgcn.sw.lds.kernel_3.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_3.md.type { %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_3.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.kernel_0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0]]
+; @llvm.amdgcn.sw.lds.kernel_0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.kernel_0.md.type { %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.kernel_0.md.item { i32 32, i32 64, i32 96 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [4 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [4 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0)], [2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_2.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), ptr addrspace(1) poison], [2 x ptr addrspace(1)] [ptr addrspace(1) poison, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.kernel_3.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0)]], no_sanitize_address
+;.
define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
@@ -33,23 +47,23 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 18:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8
; CHECK-NEXT: call void @call_store_A()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @call_store_A()
@@ -58,16 +72,16 @@ define amdgpu_kernel void @kernel_0() sanitize_address {
define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
@@ -90,24 +104,24 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 23:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB23]]
+; CHECK: [[BB23]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@@ -116,16 +130,16 @@ define amdgpu_kernel void @kernel_1() sanitize_address {
define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2(
-; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]]
@@ -141,23 +155,23 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96
; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 18:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8
; CHECK-NEXT: call void @store_A()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @store_A()
@@ -166,16 +180,16 @@ define amdgpu_kernel void @kernel_2() sanitize_address {
define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3(
-; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META6:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB23:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]]
@@ -198,24 +212,24 @@ define amdgpu_kernel void @kernel_3() sanitize_address {
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24)
-; CHECK-NEXT: br label [[TMP14]]
-; CHECK: 23:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB23]]
+; CHECK: [[BB23]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
@@ -243,7 +257,9 @@ define private void @store_A() sanitize_address {
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP12]] to ptr
; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8
; CHECK-NEXT: ret void
;
@@ -262,7 +278,9 @@ define private ptr @get_B_ptr() sanitize_address {
; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP11]]
+; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP12]] to ptr
; CHECK-NEXT: ret ptr [[TMP10]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
@@ -272,8 +290,6 @@ define private ptr @get_B_ptr() sanitize_address {
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
-; CHECK: [[META2]] = !{i32 0}
-; CHECK: [[META3]] = !{i32 1}
-; CHECK: [[META4]] = !{i32 2}
-; CHECK: [[META5]] = !{i32 3}
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
index b9fa89dd6f0a6a..704bc9e6352940 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-no-kernel-lds-id.ll
@@ -29,8 +29,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP9]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) [[TMP10]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP11]]
-; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
-; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
+; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr addrspace(1) [[TMP19]] to ptr
+; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP12]] to i32
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
index 11e912287c7f7c..8f5abe962f8eb2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
@@ -28,8 +28,12 @@ define void @use_variables() sanitize_address {
; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]]
-; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
-; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP19:%.*]] = addrspacecast ptr addrspace(1) [[TMP18]] to ptr
+; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP20]]
+; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]]
@@ -44,16 +48,16 @@ define void @use_variables() sanitize_address {
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@@ -78,9 +82,9 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28)
-; CHECK-NEXT: br label [[TMP7]]
-; CHECK: 24:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB24]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -94,17 +98,17 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]]
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2
-; CHECK-NEXT: br label [[CONDFREE1:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
call void @use_variables()
@@ -124,5 +128,6 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
-; CHECK: [[META1]] = !{i32 0}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[META2]] = !{i32 0}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
new file mode 100644
index 00000000000000..1973a0acf4659d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-O0.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+ at lds = internal addrspace(3) global [5 x i32] poison, align 16
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 20, i32 64 } }, no_sanitize_address
+;.
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 52
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 44)
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = addrspacecast ptr addrspace(1) [[TMP23]] to ptr
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[TMP24]], i64 0, i64 0
+; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64
+; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr inbounds [5 x i32], ptr addrspacecast (ptr addrspace(3) @lds to ptr), i64 0, i64 0
+ store i32 1, ptr %gep, align 4
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="16" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+;.
More information about the llvm-commits
mailing list