[PATCH] D114794: [AMDGPU] Add support for in-order bvh in waitcnt pass
David Stuttard via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 30 06:01:00 PST 2021
dstuttard created this revision.
Herald added subscribers: foad, kerbowa, hiraditya, t-tye, tpr, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
dstuttard requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
bvh should be handled separately from vmem and vmem with sampler instructions
for waitcnt handling.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D114794
Files:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
Index: llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
+++ llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
@@ -16,6 +16,7 @@
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem
; GCN: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
+ ; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
@@ -37,6 +38,7 @@
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
Index: llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -150,6 +150,8 @@
VMEM_NOSAMPLER,
// MIMG instructions with a sampler.
VMEM_SAMPLER,
+ // BVH instrauctions
+ VMEM_BVH
};
VmemType getVmemType(const MachineInstr &Inst) {
@@ -157,9 +159,10 @@
if (!SIInstrInfo::isMIMG(Inst))
return VMEM_NOSAMPLER;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
- return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler
- ? VMEM_SAMPLER
- : VMEM_NOSAMPLER;
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ return BaseInfo->BVH ? VMEM_BVH
+ : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
}
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114794.390676.patch
Type: text/x-patch
Size: 2830 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211130/5487a188/attachment.bin>
More information about the llvm-commits
mailing list