[llvm] [AMDGPU][ASAN] Move infer-address-spaces before amdgpu-sw-lower-lds in pass pipeline (PR #120375)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 21 20:18:29 PST 2024
https://github.com/skc7 updated https://github.com/llvm/llvm-project/pull/120375
>From adae221c91e3605b32c273886df261544cf411f6 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Tue, 17 Dec 2024 12:04:00 +0530
Subject: [PATCH 1/2] [AMDGPU][ASAN] Move infer-address-spaces before
amdgpu-sw-lower-lds
---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 16 +--
.../amdgpu-sw-lower-lds-static-lds-test-O0.ll | 102 ++++++++++++++++++
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 12 ++-
3 files changed, 120 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..6bb10ab5bc3218 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1174,6 +1174,9 @@ void AMDGPUPassConfig::addIRPasses() {
// Replace OpenCL enqueued block function pointers with global variables.
addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(createInferAddressSpacesPass());
+
// Lower LDS accesses to global memory pass if address sanitizer is enabled.
if (EnableSwLowerLDS)
addPass(createAMDGPUSwLowerLDSLegacyPass(&TM));
@@ -1183,9 +1186,6 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
}
- if (TM.getOptLevel() > CodeGenOptLevel::None)
- addPass(createInferAddressSpacesPass());
-
// Run atomic optimizer before Atomic Expand
if ((TM.getTargetTriple().getArch() == Triple::amdgcn) &&
(TM.getOptLevel() >= CodeGenOptLevel::Less) &&
@@ -1941,13 +1941,17 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
// TODO: Missing OpenCLEnqueuedBlockLowering
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(InferAddressSpacesPass());
+
+ // Lower LDS accesses to global memory pass if address sanitizer is enabled.
+ if (EnableSwLowerLDS)
+ addPass(AMDGPUSwLowerLDSPass(TM));
+
// Runs before PromoteAlloca so the latter can account for function uses
if (EnableLowerModuleLDS)
addPass(AMDGPULowerModuleLDSPass(TM));
- if (TM.getOptLevel() > CodeGenOptLevel::None)
- addPass(InferAddressSpacesPass());
-
// Run atomic optimizer before Atomic Expand
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
new file mode 100644
index 00000000000000..4896cd335a7fa4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes='function(infer-address-spaces),amdgpu-sw-lower-lds' -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
+ at lds = internal addrspace(3) global [5 x i32] undef, align 16
+
+;.
+; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 20, i32 64 } }, no_sanitize_address
+;.
+define amdgpu_kernel void @k0() sanitize_address {
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: WId:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
+; CHECK: Malloc:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
+; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 52
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
+; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 44)
+; CHECK-NEXT: br label [[TMP18]]
+; CHECK: 18:
+; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP20]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(3) [[TMP21]], i64 0, i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64
+; CHECK-NEXT: [[TMP25:%.*]] = lshr i64 [[TMP24]], 3
+; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 2147450880
+; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
+; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
+; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i8 [[TMP28]], 0
+; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP24]], 7
+; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], 3
+; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i8
+; CHECK-NEXT: [[TMP33:%.*]] = icmp sge i8 [[TMP32]], [[TMP28]]
+; CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP29]], [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP34]])
+; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP35]], 0
+; CHECK-NEXT: br i1 [[TMP36]], label [[ASAN_REPORT:%.*]], label [[TMP39:%.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: asan.report:
+; CHECK-NEXT: br i1 [[TMP34]], label [[TMP37:%.*]], label [[TMP38:%.*]]
+; CHECK: 37:
+; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP24]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label [[TMP38]]
+; CHECK: 38:
+; CHECK-NEXT: br label [[TMP39]]
+; CHECK: 39:
+; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: br label [[CONDFREE:%.*]]
+; CHECK: CondFree:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
+; CHECK: Free:
+; CHECK-NEXT: [[TMP40:%.*]] = call ptr @llvm.returnaddress(i32 0)
+; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[TMP40]] to i64
+; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
+; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP42]], i64 [[TMP41]])
+; CHECK-NEXT: br label [[END]]
+; CHECK: End:
+; CHECK-NEXT: ret void
+;
+ %gep = getelementptr inbounds [5 x i32], ptr addrspacecast (ptr addrspace(3) @lds to ptr), i64 0, i64 0
+ store i32 1, ptr %gep, align 4
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 4, !"nosanitize_address", i32 1}
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="16" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
+; CHECK: attributes #[[ATTR6]] = { nomerge }
+;.
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index e77f4f69e265bb..ff4f7864eec7d3 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -187,10 +187,11 @@
; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-NEXT: Function Alias Analysis Results
; GCN-O1-NEXT: Lower OpenCL enqueued blocks
+; GCN-O1-NEXT: FunctionPass Manager
+; GCN-O1-NEXT: Infer address spaces
; GCN-O1-NEXT: AMDGPU Software lowering of LDS
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-NEXT: FunctionPass Manager
-; GCN-O1-NEXT: Infer address spaces
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Cycle Info Analysis
; GCN-O1-NEXT: Uniformity Analysis
@@ -467,10 +468,11 @@
; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks
+; GCN-O1-OPTS-NEXT: FunctionPass Manager
+; GCN-O1-OPTS-NEXT: Infer address spaces
; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
-; GCN-O1-OPTS-NEXT: Infer address spaces
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
; GCN-O1-OPTS-NEXT: Uniformity Analysis
@@ -777,10 +779,11 @@
; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O2-NEXT: Function Alias Analysis Results
; GCN-O2-NEXT: Lower OpenCL enqueued blocks
+; GCN-O2-NEXT: FunctionPass Manager
+; GCN-O2-NEXT: Infer address spaces
; GCN-O2-NEXT: AMDGPU Software lowering of LDS
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O2-NEXT: FunctionPass Manager
-; GCN-O2-NEXT: Infer address spaces
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Cycle Info Analysis
; GCN-O2-NEXT: Uniformity Analysis
@@ -1091,10 +1094,11 @@
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Lower OpenCL enqueued blocks
+; GCN-O3-NEXT: FunctionPass Manager
+; GCN-O3-NEXT: Infer address spaces
; GCN-O3-NEXT: AMDGPU Software lowering of LDS
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O3-NEXT: FunctionPass Manager
-; GCN-O3-NEXT: Infer address spaces
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Cycle Info Analysis
; GCN-O3-NEXT: Uniformity Analysis
>From 70275c38078ba4d0ba948899cb8a2a7a1bddbcb7 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Thu, 19 Dec 2024 22:24:26 +0530
Subject: [PATCH 2/2] [AMDGPU] update test to replace undef with poison.
---
.../CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
index 4896cd335a7fa4..9e4fa21ac074d5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; RUN: opt < %s -passes='function(infer-address-spaces),amdgpu-sw-lower-lds' -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
- at lds = internal addrspace(3) global [5 x i32] undef, align 16
+ at lds = internal addrspace(3) global [5 x i32] poison, align 16
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]]
More information about the llvm-commits
mailing list