[PATCH] D123525: [AMDGPU] On gfx908, reserve VGPR for AGPR copy based on register budget.
Mahesha S via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 12 02:05:28 PDT 2022
hsmhsm updated this revision to Diff 422144.
hsmhsm added a comment.
Here is the update where we always reserve highest avaialble VGPR irrespective of the
register constraint.
With this update, below two lit tests fail to compile because RegAlloc fails.
spill-agpr.ll
-------------
define amdgpu_kernel void @max_5regs_used_8a(<4 x float> addrspace(1)* %arg) #4 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%v0 = call float asm sideeffect "; def $0", "=v"()
%a4 = call <4 x float> asm sideeffect "; def $0", "=a"()
%gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i32 %tid
%mai.in = load <4 x float>, <4 x float> addrspace(1)* %gep
%mai.out = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 1.0, <4 x float> %mai.in, i32 0, i32 0, i32 0)
store <4 x float> %mai.out, <4 x float> addrspace(1)* %gep
store volatile <4 x float> %a4, <4 x float> addrspace(1)* undef
call void asm sideeffect "; use $0", "v"(float %v0);
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
attributes #4 = { nounwind "amdgpu-num-vgpr"="5" }
spill-vgpr-to-agpr.ll
---------------------
define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)* %p) #0 {
%tid = load volatile i32, i32 addrspace(1)* undef
call void asm sideeffect "", "a"(i32 1)
%p1 = getelementptr inbounds i64, i64 addrspace(1)* %p, i32 %tid
%p2 = getelementptr inbounds i64, i64 addrspace(1)* %p1, i32 8
%p3 = getelementptr inbounds i64, i64 addrspace(1)* %p2, i32 16
%p4 = getelementptr inbounds i64, i64 addrspace(1)* %p3, i32 24
%p5 = getelementptr inbounds i64, i64 addrspace(1)* %p4, i32 32
%v1 = load volatile i64, i64 addrspace(1)* %p1
%v2 = load volatile i64, i64 addrspace(1)* %p2
%v3 = load volatile i64, i64 addrspace(1)* %p3
%v4 = load volatile i64, i64 addrspace(1)* %p4
%v5 = load volatile i64, i64 addrspace(1)* %p5
call void asm sideeffect "", "v,v,v,v,v"(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5)
store volatile i64 %v1, i64 addrspace(1)* %p2
store volatile i64 %v2, i64 addrspace(1)* %p3
store volatile i64 %v3, i64 addrspace(1)* %p4
store volatile i64 %v4, i64 addrspace(1)* %p5
store volatile i64 %v5, i64 addrspace(1)* %p1
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D123525/new/
https://reviews.llvm.org/D123525
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
llvm/test/CodeGen/AMDGPU/agpr-copy-no-vgprs.mir
llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
llvm/test/CodeGen/AMDGPU/agpr-remat.ll
llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
llvm/test/CodeGen/AMDGPU/spill-agpr.mir
llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
More information about the llvm-commits
mailing list