[llvm-branch-commits] [llvm] [WIP][AMDGPU] Enable `AAAddressSpace` in `AMDGPUAttributor` (PR #101593)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Aug 1 19:59:07 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/101593
>From 9b743f5bd577be07c858b1357da8d39264bc34db Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 1 Aug 2024 20:30:07 -0400
Subject: [PATCH] [WIP][AMDGPU] Enable `AAAddressSpace` in `AMDGPUAttributor`
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 13 ++++++-
.../AMDGPU/annotate-kernel-features-hsa.ll | 36 +++++++++++--------
.../CodeGen/AMDGPU/simple-indirect-call.ll | 15 ++++----
3 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index de1f3421cce4e..39c52140dfbd2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1038,7 +1038,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
- &AAUnderlyingObjects::ID});
+ &AAUnderlyingObjects::ID, &AAAddressSpace::ID});
AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
@@ -1064,6 +1064,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
} else if (CC == CallingConv::AMDGPU_KERNEL) {
addPreloadKernArgHint(F, TM);
}
+
+ for (auto &I : instructions(F)) {
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ A.getOrCreateAAFor<AAAddressSpace>(
+ IRPosition::value(*LI->getPointerOperand()));
+ }
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ A.getOrCreateAAFor<AAAddressSpace>(
+ IRPosition::value(*SI->getPointerOperand()));
+ }
+ }
}
ChangeStatus Change = A.run();
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 43cdf85ed3818..879bceaef97c0 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -425,8 +425,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
-; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -443,8 +442,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] {
-; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -478,11 +476,16 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
; No-op addrspacecast should not use queue ptr
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
-; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
-; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
-; HSA-NEXT: ret void
+; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; AKF_HSA-NEXT: ret void
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
+; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
store volatile i32 0, ptr %stof
@@ -490,11 +493,16 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
-; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
-; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
-; HSA-NEXT: ret void
+; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
+; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
+; AKF_HSA-NEXT: ret void
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[PTR]], align 4
+; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
%ld = load volatile i32, ptr %stof
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index e86ee1adef3d0..3a6b0485d2417 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -34,9 +34,8 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
-; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: ret void
;
@@ -75,12 +74,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;.
+; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;.
More information about the llvm-branch-commits
mailing list