[llvm] [AMDGPU] Lower LDS in functions without sanitize_address in amdgpu-sw-lower-lds. (PR #131147)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 14 08:07:53 PDT 2025
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/131147
>From ef877020f0c82f6a9f311c5da27432829b952666 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 12 Mar 2025 23:06:01 +0530
Subject: [PATCH 1/2] [AMDGPU] Lower LDS in kernels without sanitize_address in
amdgpu-sw-lower-lds.
---
llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 25 +++-
.../AMDGPU/amdgpu-sw-lower-lds-lower-all.ll | 128 ++++++++++++++++++
.../AMDGPU/amdgpu-sw-lower-lds-lower-none.ll | 38 ++++++
...er-lds-static-indirect-access-lower-all.ll | 127 +++++++++++++++++
...r-lds-static-indirect-access-lower-none.ll | 51 +++++++
5 files changed, 365 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index 2a41f7cad1f00..b1a957317ef08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -298,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
for (User *V : GV->users()) {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
- if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
- !F->isDeclaration())
+ if (!isKernelLDS(F) && !F->isDeclaration())
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
}
}
@@ -1135,6 +1134,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
AsanInfo.Offset = Offset;
}
+static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
+ for (auto &K : LDSAccesses) {
+ Function *F = K.first;
+ if (!F)
+ continue;
+ if (F->hasFnAttribute(Attribute::SanitizeAddress))
+ return true;
+ }
+ return false;
+}
+
bool AMDGPUSwLowerLDS::run() {
bool Changed = false;
@@ -1145,6 +1155,14 @@ bool AMDGPUSwLowerLDS::run() {
// Get all the direct and indirect access of LDS for all the kernels.
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
+ // Flag to decide whether to lower all the LDS accesses
+ // based on sanitize_address attribute.
+ bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
+ hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
+
+ if (!LowerAllLDS)
+ return Changed;
+
// Utility to group LDS access into direct, indirect, static and dynamic.
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
bool DirectAccess) {
@@ -1154,8 +1172,6 @@ bool AMDGPUSwLowerLDS::run() {
continue;
assert(isKernelLDS(F));
- if (!F->hasFnAttribute(Attribute::SanitizeAddress))
- continue;
// Only inserts if key isn't already in the map.
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1222,6 +1238,7 @@ bool AMDGPUSwLowerLDS::run() {
// Get non-kernels with LDS ptr as argument and called by kernels.
getNonKernelsWithLDSArguments(CG);
+ // Lower LDS accesses in non-kernels.
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
NonKernelLDSParameters NKLDSParams;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
new file mode 100644
index 0000000000000..d67a42aaf435c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
+; other kernels in module have sanitize_address attribute.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: WId:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
+; CHECK: Malloc:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT: br label [[TMP20]]
+; CHECK: 20:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
+; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
+; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
+; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
+; CHECK-NEXT: br label [[CONDFREE:%.*]]
+; CHECK: CondFree:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK: Free:
+; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
+; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: End:
+; CHECK-NEXT: ret void
+;
+ store i8 7, ptr addrspace(3) @lds_1, align 4
+ store i32 8, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+define amdgpu_kernel void @k1() {
+; CHECK-LABEL: define amdgpu_kernel void @k1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: WId:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
+; CHECK: Malloc:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT: br label [[TMP18]]
+; CHECK: 18:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
+; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
+; CHECK-NEXT: br label [[CONDFREE:%.*]]
+; CHECK: CondFree:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK: Free:
+; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
+; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: End:
+; CHECK-NEXT: ret void
+;
+ store i32 9, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
new file mode 100644
index 0000000000000..aac64e2f3bc08
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
+; if all other kernels don't have sanitize_address attribute.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+;.
+define amdgpu_kernel void @k0() {
+; CHECK-LABEL: define amdgpu_kernel void @k0() {
+; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
+; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT: ret void
+;
+ store i8 7, ptr addrspace(3) @lds_1, align 4
+ store i32 8, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+define amdgpu_kernel void @k1() {
+; CHECK-LABEL: define amdgpu_kernel void @k1() {
+; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT: ret void
+;
+ store i32 9, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
new file mode 100644
index 0000000000000..4625a7f626f9b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS is lowered correctly when a non-kernel without sanitize_address attr with LDS accesses is called from
+; kernel which has sanitize_address attr.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+ at lds_3 = external addrspace(3) global [3 x i8], align 4
+ at lds_4 = external addrspace(3) global [4 x i8], align 8
+
+;.
+; @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+;.
+define void @use_variables() {
+; CHECK-LABEL: define void @use_variables() {
+; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP18]]
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP18]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP9]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr addrspace(1) [[TMP14]] to ptr
+; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP16]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
+; CHECK-NEXT: store i8 3, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(3) [[TMP12]] to i32
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP19]]
+; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT: ret void
+;
+ %X = addrspacecast ptr addrspace(3) @lds_3 to ptr
+ store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4
+ store i8 3, ptr addrspace(3) @lds_4, align 8
+ ret void
+}
+
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT: [[WID:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK: [[MALLOC]]:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
+; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 29)
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
+; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 28)
+; CHECK-NEXT: br label %[[BB24]]
+; CHECK: [[BB24]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP28]]
+; CHECK-NEXT: call void @use_variables()
+; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP30]]
+; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP32]]
+; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP33]], align 2
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
+; CHECK-NEXT: [[TMP34:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr [[TMP34]] to i64
+; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP36]], i64 [[TMP35]])
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
+; CHECK-NEXT: ret void
+;
+ call void @use_variables()
+ store i8 7, ptr addrspace(3) @lds_1, align 1
+ store i32 8, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll
new file mode 100644
index 0000000000000..5dbab5643b929
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if LDS is not lowered when a non-kernel with sanitize_address attr and with LDS accesses is called from
+; kernel which doesn't have sanitize_address attr.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+ at lds_3 = external addrspace(3) global [3 x i8], align 4
+ at lds_4 = external addrspace(3) global [4 x i8], align 8
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+; CHECK: @lds_3 = external addrspace(3) global [3 x i8], align 4
+; CHECK: @lds_4 = external addrspace(3) global [4 x i8], align 8
+;.
+define void @use_variables() sanitize_address {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) @lds_3 to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) @lds_3 to ptr
+; CHECK-NEXT: store i8 3, ptr [[TMP1]], align 4
+; CHECK-NEXT: store i8 3, ptr addrspace(3) @lds_4, align 8
+; CHECK-NEXT: ret void
+;
+ %X = addrspacecast ptr addrspace(3) @lds_3 to ptr
+ store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4
+ store i8 3, ptr addrspace(3) @lds_4, align 8
+ ret void
+}
+
+define amdgpu_kernel void @k0() {
+; CHECK-LABEL: define amdgpu_kernel void @k0() {
+; CHECK-NEXT: call void @use_variables()
+; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 1
+; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT: ret void
+;
+ call void @use_variables()
+ store i8 7, ptr addrspace(3) @lds_1, align 1
+ store i32 8, ptr addrspace(3) @lds_2, align 2
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+;.
>From 3e27d67c187c7ef1e2ef80ecb8c136fdb125b7fe Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 14 Mar 2025 20:35:39 +0530
Subject: [PATCH 2/2] [AMDGPU] update tests to use new version of
update_test_checks.py
---
.../AMDGPU/amdgpu-sw-lower-lds-lower-all.ll | 51 +++++++++----------
.../AMDGPU/amdgpu-sw-lower-lds-lower-none.ll | 3 +-
2 files changed, 26 insertions(+), 28 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
index d67a42aaf435c..f30a382a62c6b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
@@ -1,6 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
-; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
; other kernels in module have sanitize_address attribute.
@@ -10,15 +9,15 @@
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
@@ -37,9 +36,9 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
-; CHECK-NEXT: br label [[TMP20]]
-; CHECK: 20:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB20]]
+; CHECK: [[BB20]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -52,17 +51,17 @@ define amdgpu_kernel void @k0() sanitize_address {
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -73,15 +72,15 @@ define amdgpu_kernel void @k0() sanitize_address {
define amdgpu_kernel void @k1() {
; CHECK-LABEL: define amdgpu_kernel void @k1(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: WId:
+; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
-; CHECK: Malloc:
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
@@ -97,9 +96,9 @@ define amdgpu_kernel void @k1() {
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
-; CHECK-NEXT: br label [[TMP18]]
-; CHECK: 18:
-; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: br label %[[BB18]]
+; CHECK: [[BB18]]:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
@@ -107,17 +106,17 @@ define amdgpu_kernel void @k1() {
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
-; CHECK-NEXT: br label [[CONDFREE:%.*]]
-; CHECK: CondFree:
+; CHECK-NEXT: br label %[[CONDFREE:.*]]
+; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK: Free:
+; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
-; CHECK-NEXT: br label [[END]]
-; CHECK: End:
+; CHECK-NEXT: br label %[[END]]
+; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
store i32 9, ptr addrspace(3) @lds_2, align 2
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
index aac64e2f3bc08..5ce12888babbc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
@@ -1,6 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
-; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
; if all other kernels don't have sanitize_address attribute.
More information about the llvm-commits
mailing list