[llvm] 53fb907 - AMDGPU: Special case uniformity info for single lane workgroups
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 28 04:25:57 PDT 2023
Author: Matt Arsenault
Date: 2023-06-28T07:25:48-04:00
New Revision: 53fb907df4723f5267f30fe8da103f91dfb1a175
URL: https://github.com/llvm/llvm-project/commit/53fb907df4723f5267f30fe8da103f91dfb1a175
DIFF: https://github.com/llvm/llvm-project/commit/53fb907df4723f5267f30fe8da103f91dfb1a175.diff
LOG: AMDGPU: Special case uniformity info for single lane workgroups
Constructors/destructors and OpenMP make use of single lane groups
in some cases.
Added:
Modified:
llvm/lib/Analysis/UniformityAnalysis.cpp
llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index de5160f7fa04b..bf0b194dcd708 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -120,7 +120,7 @@ llvm::UniformityInfo UniformityInfoAnalysis::run(Function &F,
auto &CI = FAM.getResult<CycleAnalysis>(F);
UniformityInfo UI{F, DT, CI, &TTI};
// Skip computation if we can assume everything is uniform.
- if (TTI.hasBranchDivergence())
+ if (TTI.hasBranchDivergence(&F))
UI.compute();
return UI;
@@ -175,7 +175,7 @@ bool UniformityInfoWrapperPass::runOnFunction(Function &F) {
UniformityInfo{F, domTree, cycleInfo, &targetTransformInfo};
// Skip computation if we can assume everything is uniform.
- if (targetTransformInfo.hasBranchDivergence())
+ if (targetTransformInfo.hasBranchDivergence(m_function))
m_uniformityInfo.compute();
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
index f763ca91b6321..2ea03ddb1fccd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
@@ -14,6 +14,7 @@
#include "AMDGPURegBankSelect.h"
#include "AMDGPU.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/InitializePasses.h"
@@ -59,13 +60,14 @@ bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
assert(checkFunctionIsLegal(MF));
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MachineCycleInfo &CycleInfo =
getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
MachineDominatorTree &DomTree = getAnalysis<MachineDominatorTree>();
- // TODO: Check for single lane execution.
MachineUniformityInfo Uniformity =
- computeMachineUniformityInfo(MF, CycleInfo, DomTree.getBase(), true);
+ computeMachineUniformityInfo(MF, CycleInfo, DomTree.getBase(),
+ !ST.isSingleLaneExecution(F));
(void)Uniformity; // TODO: Use this
assignRegisterBanks(MF);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9393dee16df25..9b50f4fa53ac5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -477,6 +477,15 @@ unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,
return getFlatWorkGroupSizes(Kernel).second - 1;
}
+bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const {
+ for (int I = 0; I < 3; ++I) {
+ if (getMaxWorkitemID(Func, I) > 0)
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Function *Kernel = I->getParent()->getParent();
unsigned MinSize = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 7d8dce7f0c7d3..4827f319b4441 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -272,6 +272,9 @@ class AMDGPUSubtarget {
/// 2) dimension.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
+ /// Return true if only a single workitem can be active in a wave.
+ bool isSingleLaneExecution(const Function &Kernel) const;
+
/// Creates value range metadata on an workitemid.* intrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index e9ca0635836d9..bec4d30fb9f03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -302,7 +302,7 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
}
bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
- return true;
+ return !F || !ST->isSingleLaneExecution(*F);
}
unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
index 205d69eb8b3a1..48528c6112b00 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
@@ -52,6 +52,12 @@ define void @asm_mixed_sgpr_vgpr(i32 %divergent) {
ret void
}
+; CHECK-LABEL: for function 'single_lane_func_arguments':
+; CHECK-NOT: DIVERGENT
+define void @single_lane_func_arguments(i32 %i32, i1 %i1) #2 {
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #1
@@ -60,3 +66,4 @@ declare i64 @llvm.amdgcn.ballot.i32(i1) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readnone convergent }
+attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
index ed05aaad7efd4..7466c2396e6f1 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -41,5 +41,83 @@ define amdgpu_kernel void @mbcnt_hi() #1 {
ret void
}
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_x_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_y_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_z_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_x_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_y_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_z_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimx':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() !reqd_work_group_size !1 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimy':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() !reqd_work_group_size !2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimz':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_group_size !3 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
+attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
+
+!0 = !{i32 1, i32 1, i32 1}
+!1 = !{i32 2, i32 1, i32 1}
+!2 = !{i32 1, i32 2, i32 1}
+!3 = !{i32 1, i32 1, i32 2}
More information about the llvm-commits
mailing list