[llvm] [AMDGPU] Optimize block count calculations to the new ABI (PR #174112)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 31 15:05:39 PST 2025
================
@@ -322,6 +325,48 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
}
}
+ // Upgrade the old method of calculating the block size using the grid size.
+ // We pattern match any case where the implicit argument group size is the
+ // divisor to a dispatch packet grid size read of the same dimension.
+ if (IsV5OrAbove && llvm::any_of(GroupSizes, [](Value *V) { return V; })) {
+ for (int I = 0; I < 3; I++) {
+ Value *GroupSize = GroupSizes[I];
+ if (!GroupSize)
+ continue;
+
+ for (User *U : GroupSize->users()) {
+ Instruction *Inst = dyn_cast<Instruction>(U);
+ if (isa<ZExtInst>(Inst))
+ Inst = Inst->getNextNode();
+
+ using namespace llvm::PatternMatch;
+ Value *Idx;
+ if (!match(Inst,
+ m_UDiv(m_Load(m_GEP(
+ m_Intrinsic<Intrinsic::amdgcn_dispatch_ptr>(),
+ m_Value(Idx))),
+ m_Value())))
+ continue;
+
+ ConstantInt *Offset = dyn_cast<ConstantInt>(Idx);
+ if (!Offset ||
+ Offset->getZExtValue() != GRID_SIZE_X + I * sizeof(uint32_t))
+ continue;
+
+ IRBuilder<> Builder(Inst);
+
+ Value *GEP = Builder.CreateConstGEP1_64(Builder.getInt8Ty(), CI,
+ HIDDEN_BLOCK_COUNT_X +
+ I * sizeof(uint32_t));
+ Value *BlockCount = Builder.CreateLoad(Builder.getInt32Ty(), GEP);
----------------
jhuber6 wrote:
The grid size can be any u32, but there's possibly some metadata that restricts that? I'm not sure how easy it'd be to propagate here since it's on the kernel right.
https://github.com/llvm/llvm-project/pull/174112
More information about the llvm-commits
mailing list