[llvm] [AMDGPU] Lower LDS in functions without sanitize_address in amdgpu-sw-lower-lds. (PR #131147)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 08:07:53 PDT 2025


https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/131147

>From ef877020f0c82f6a9f311c5da27432829b952666 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Wed, 12 Mar 2025 23:06:01 +0530
Subject: [PATCH 1/2] [AMDGPU] Lower LDS in kernels without sanitize_address in
 amdgpu-sw-lower-lds.

---
 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp   |  25 +++-
 .../AMDGPU/amdgpu-sw-lower-lds-lower-all.ll   | 128 ++++++++++++++++++
 .../AMDGPU/amdgpu-sw-lower-lds-lower-none.ll  |  38 ++++++
 ...er-lds-static-indirect-access-lower-all.ll | 127 +++++++++++++++++
 ...r-lds-static-indirect-access-lower-none.ll |  51 +++++++
 5 files changed, 365 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index 2a41f7cad1f00..b1a957317ef08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -298,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
     for (User *V : GV->users()) {
       if (auto *I = dyn_cast<Instruction>(V)) {
         Function *F = I->getFunction();
-        if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
-            !F->isDeclaration())
+        if (!isKernelLDS(F) && !F->isDeclaration())
           FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
       }
     }
@@ -1135,6 +1134,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
   AsanInfo.Offset = Offset;
 }
 
+static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
+  for (auto &K : LDSAccesses) {
+    Function *F = K.first;
+    if (!F)
+      continue;
+    if (F->hasFnAttribute(Attribute::SanitizeAddress))
+      return true;
+  }
+  return false;
+}
+
 bool AMDGPUSwLowerLDS::run() {
   bool Changed = false;
 
@@ -1145,6 +1155,14 @@ bool AMDGPUSwLowerLDS::run() {
   // Get all the direct and indirect access of LDS for all the kernels.
   LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
 
+  // Flag to decide whether to lower all the LDS accesses
+  // based on sanitize_address attribute.
+  bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
+                     hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
+
+  if (!LowerAllLDS)
+    return Changed;
+
   // Utility to group LDS access into direct, indirect, static and dynamic.
   auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
                                             bool DirectAccess) {
@@ -1154,8 +1172,6 @@ bool AMDGPUSwLowerLDS::run() {
         continue;
 
       assert(isKernelLDS(F));
-      if (!F->hasFnAttribute(Attribute::SanitizeAddress))
-        continue;
 
       // Only inserts if key isn't already in the map.
       FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1222,6 +1238,7 @@ bool AMDGPUSwLowerLDS::run() {
   // Get non-kernels with LDS ptr as argument and called by kernels.
   getNonKernelsWithLDSArguments(CG);
 
+  // Lower LDS accesses in non-kernels.
   if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
       !FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
     NonKernelLDSParameters NKLDSParams;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
new file mode 100644
index 0000000000000..d67a42aaf435c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
+; other kernels in module have sanitize_address attribute.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  WId:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
+; CHECK:       Malloc:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT:    br label [[TMP20]]
+; CHECK:       20:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
+; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP27]], align 4
+; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
+; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP29]], align 2
+; CHECK-NEXT:    br label [[CONDFREE:%.*]]
+; CHECK:       CondFree:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK:       Free:
+; CHECK-NEXT:    [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
+; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       End:
+; CHECK-NEXT:    ret void
+;
+  store i8 7, ptr addrspace(3) @lds_1, align 4
+  store i32 8, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+define amdgpu_kernel void @k1() {
+; CHECK-LABEL: define amdgpu_kernel void @k1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  WId:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
+; CHECK:       Malloc:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 28)
+; CHECK-NEXT:    br label [[TMP18]]
+; CHECK:       18:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
+; CHECK-NEXT:    store i32 9, ptr addrspace(1) [[TMP23]], align 2
+; CHECK-NEXT:    br label [[CONDFREE:%.*]]
+; CHECK:       CondFree:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK:       Free:
+; CHECK-NEXT:    [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
+; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       End:
+; CHECK-NEXT:    ret void
+;
+  store i32 9, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
new file mode 100644
index 0000000000000..aac64e2f3bc08
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
+; if all other kernels don't have sanitize_address attribute.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
+;.
+define amdgpu_kernel void @k0() {
+; CHECK-LABEL: define amdgpu_kernel void @k0() {
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) @lds_1, align 4
+; CHECK-NEXT:    store i32 8, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT:    ret void
+;
+  store i8 7, ptr addrspace(3) @lds_1, align 4
+  store i32 8, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+define amdgpu_kernel void @k1() {
+; CHECK-LABEL: define amdgpu_kernel void @k1() {
+; CHECK-NEXT:    store i32 9, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT:    ret void
+;
+  store i32 9, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
new file mode 100644
index 0000000000000..4625a7f626f9b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-all.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if static LDS is lowered correctly when a non-kernel without sanitize_address attr with LDS accesses is called from 
+; kernel which has sanitize_address attr.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+ at lds_3 = external addrspace(3) global [3 x i8], align 4
+ at lds_4 = external addrspace(3) global [4 x i8], align 8
+
+;.
+; @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
+; @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address
+; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address
+; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address
+;.
+define void @use_variables() {
+; CHECK-LABEL: define void @use_variables() {
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP18]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP18]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = addrspacecast ptr addrspace(1) [[TMP14]] to ptr
+; CHECK-NEXT:    [[TMP16:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP16]]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP17]] to ptr
+; CHECK-NEXT:    store i8 3, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(3) [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP19]]
+; CHECK-NEXT:    store i8 3, ptr addrspace(1) [[TMP20]], align 8
+; CHECK-NEXT:    ret void
+;
+  %X = addrspacecast ptr addrspace(3) @lds_3 to ptr
+  store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4
+  store i8 3, ptr addrspace(3) @lds_4, align 8
+  ret void
+}
+
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
+; CHECK-NEXT:  [[WID:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
+; CHECK:       [[MALLOC]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT:    store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 31)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
+; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 99
+; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP21]], i64 29)
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 132
+; CHECK-NEXT:    [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64
+; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP23]], i64 28)
+; CHECK-NEXT:    br label %[[BB24]]
+; CHECK:       [[BB24]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[TMP25:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP28]]
+; CHECK-NEXT:    call void @use_variables()
+; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP30]]
+; CHECK-NEXT:    store i8 7, ptr addrspace(1) [[TMP31]], align 1
+; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP25]], i32 [[TMP32]]
+; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP33]], align 2
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
+; CHECK-NEXT:    [[TMP34:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT:    [[TMP35:%.*]] = ptrtoint ptr [[TMP34]] to i64
+; CHECK-NEXT:    [[TMP36:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64
+; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP36]], i64 [[TMP35]])
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    ret void
+;
+  call void @use_variables()
+  store i8 7, ptr addrspace(3) @lds_1, align 1
+  store i32 8, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll
new file mode 100644
index 0000000000000..5dbab5643b929
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-lower-none.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
+; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+
+; Test to check if LDS is not lowered when a non-kernel with sanitize_address attr and with LDS accesses is called from 
+; kernel which doesn't have sanitize_address attr.
+ at lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+ at lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+ at lds_3 = external addrspace(3) global [3 x i8], align 4
+ at lds_4 = external addrspace(3) global [4 x i8], align 8
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 1
+; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 2
+; CHECK: @lds_3 = external addrspace(3) global [3 x i8], align 4
+; CHECK: @lds_4 = external addrspace(3) global [4 x i8], align 8
+;.
+define void @use_variables() sanitize_address {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[X:%.*]] = addrspacecast ptr addrspace(3) @lds_3 to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) @lds_3 to ptr
+; CHECK-NEXT:    store i8 3, ptr [[TMP1]], align 4
+; CHECK-NEXT:    store i8 3, ptr addrspace(3) @lds_4, align 8
+; CHECK-NEXT:    ret void
+;
+  %X = addrspacecast ptr addrspace(3) @lds_3 to ptr
+  store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4
+  store i8 3, ptr addrspace(3) @lds_4, align 8
+  ret void
+}
+
+define amdgpu_kernel void @k0() {
+; CHECK-LABEL: define amdgpu_kernel void @k0() {
+; CHECK-NEXT:    call void @use_variables()
+; CHECK-NEXT:    store i8 7, ptr addrspace(3) @lds_1, align 1
+; CHECK-NEXT:    store i32 8, ptr addrspace(3) @lds_2, align 2
+; CHECK-NEXT:    ret void
+;
+  call void @use_variables()
+  store i8 7, ptr addrspace(3) @lds_1, align 1
+  store i32 8, ptr addrspace(3) @lds_2, align 2
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+;.

>From 3e27d67c187c7ef1e2ef80ecb8c136fdb125b7fe Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Fri, 14 Mar 2025 20:35:39 +0530
Subject: [PATCH 2/2] [AMDGPU] update tests to use new version of
 update_test_checks.py

---
 .../AMDGPU/amdgpu-sw-lower-lds-lower-all.ll   | 51 +++++++++----------
 .../AMDGPU/amdgpu-sw-lower-lds-lower-none.ll  |  3 +-
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
index d67a42aaf435c..f30a382a62c6b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-all.ll
@@ -1,6 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
-; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
 ; other kernels in module have sanitize_address attribute.
@@ -10,15 +9,15 @@
 define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-LABEL: define amdgpu_kernel void @k0(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
@@ -37,9 +36,9 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
 ; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP19]], i64 28)
-; CHECK-NEXT:    br label [[TMP20]]
-; CHECK:       20:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -52,17 +51,17 @@ define amdgpu_kernel void @k0() sanitize_address {
 ; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
 ; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
 ; CHECK-NEXT:    store i32 8, ptr addrspace(1) [[TMP29]], align 2
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
 ; CHECK-NEXT:    [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i8 7, ptr addrspace(3) @lds_1, align 4
@@ -73,15 +72,15 @@ define amdgpu_kernel void @k0() sanitize_address {
 define amdgpu_kernel void @k1() {
 ; CHECK-LABEL: define amdgpu_kernel void @k1(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  WId:
+; CHECK-NEXT:  [[WID:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
-; CHECK:       Malloc:
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
+; CHECK:       [[MALLOC]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
@@ -97,9 +96,9 @@ define amdgpu_kernel void @k1() {
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
 ; CHECK-NEXT:    call void @__asan_poison_region(i64 [[TMP17]], i64 28)
-; CHECK-NEXT:    br label [[TMP18]]
-; CHECK:       18:
-; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
 ; CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
@@ -107,17 +106,17 @@ define amdgpu_kernel void @k1() {
 ; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
 ; CHECK-NEXT:    store i32 9, ptr addrspace(1) [[TMP23]], align 2
-; CHECK-NEXT:    br label [[CONDFREE:%.*]]
-; CHECK:       CondFree:
+; CHECK-NEXT:    br label %[[CONDFREE:.*]]
+; CHECK:       [[CONDFREE]]:
 ; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
-; CHECK-NEXT:    br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
-; CHECK:       Free:
+; CHECK-NEXT:    br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
+; CHECK:       [[FREE]]:
 ; CHECK-NEXT:    [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
 ; CHECK-NEXT:    [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
 ; CHECK-NEXT:    call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       End:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
   store i32 9, ptr addrspace(3) @lds_2, align 2
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
index aac64e2f3bc08..5ce12888babbc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-lower-none.ll
@@ -1,6 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
-; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
 
 ; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
 ; if all other kernels don't have sanitize_address attribute.



More information about the llvm-commits mailing list