[llvm] [AMDGPU] Optimize block count calculations to the new ABI (PR #174112)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 00:54:22 PST 2026
================
@@ -323,6 +326,50 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
}
}
+ // Upgrade the old method of calculating the block size using the grid size.
+ // We pattern match any case where the implicit argument group size is the
+ // divisor to a dispatch packet grid size read of the same dimension.
+ if (IsV5OrAbove) {
+ for (int I = 0; I < 3; I++) {
+ Value *GroupSize = GroupSizes[I];
+ if (!GroupSize)
+ continue;
+
+ for (User *U : GroupSize->users()) {
+ Instruction *Inst = cast<Instruction>(U);
+ if (isa<ZExtInst>(Inst) && !Inst->use_empty())
+ Inst = dyn_cast<Instruction>(*Inst->user_begin());
+
+ using namespace llvm::PatternMatch;
+ if (!match(
+ Inst,
+ m_UDiv(m_ZExtOrSelf(m_Load(m_GEP(
+ m_Intrinsic<Intrinsic::amdgcn_dispatch_ptr>(),
+ m_SpecificInt(GRID_SIZE_X + I * sizeof(uint32_t))))),
+ m_Value())))
+ continue;
+
+ IRBuilder<> Builder(Inst);
+
+ Value *GEP = Builder.CreateConstGEP1_64(Builder.getInt8Ty(), CI,
+ HIDDEN_BLOCK_COUNT_X +
+ I * sizeof(uint32_t));
+ Instruction *BlockCount = Builder.CreateLoad(Builder.getInt32Ty(), GEP);
+ if (MDNode *Node = Inst->getMetadata(LLVMContext::MD_invariant_load))
+ BlockCount->setMetadata(LLVMContext::MD_invariant_load, Node);
+ if (MDNode *Node = Inst->getMetadata(LLVMContext::MD_noundef))
+ BlockCount->setMetadata(LLVMContext::MD_noundef, Node);
+
+ BlockCount =
+ cast<Instruction>(Builder.CreateZExt(BlockCount, Inst->getType()));
+
+ Inst->replaceAllUsesWith(BlockCount);
----------------
jmmartinez wrote:
Nitpick: if we use a new local variable we can get rid of the cast.
```suggestion
Value* BlockCountZExt =
Builder.CreateZExt(BlockCount, Inst->getType());
Inst->replaceAllUsesWith(BlockCountZExt);
```
https://github.com/llvm/llvm-project/pull/174112
More information about the llvm-commits
mailing list