[PATCH] D123525: [AMDGPU] On gfx908, reserve VGPR for AGPR copy based on register budget.

Tue Apr 12 02:05:28 PDT 2022

hsmhsm updated this revision to Diff 422144.
hsmhsm added a comment.

Here is the update where we always reserve highest avaialble VGPR irrespective of the
register constraint.

With this update, below two lit tests fail to compile because RegAlloc fails.

  spill-agpr.ll
  -------------

  define amdgpu_kernel void @max_5regs_used_8a(<4 x float> addrspace(1)* %arg) #4 {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
    %v0 = call float asm sideeffect "; def $0", "=v"()
    %a4 = call <4 x float> asm sideeffect "; def $0", "=a"()
    %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i32 %tid
    %mai.in = load <4 x float>, <4 x float> addrspace(1)* %gep
    %mai.out = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 1.0, <4 x float> %mai.in, i32 0, i32 0, i32 0)
    store <4 x float> %mai.out, <4 x float> addrspace(1)* %gep
    store volatile <4 x float> %a4, <4 x float> addrspace(1)* undef
    call void asm sideeffect "; use $0", "v"(float %v0);
    ret void
  }

  declare i32 @llvm.amdgcn.workitem.id.x()
  declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)

  attributes #4 = { nounwind "amdgpu-num-vgpr"="5" }

  spill-vgpr-to-agpr.ll
  ---------------------

  define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)* %p) #0 {
    %tid = load volatile i32, i32 addrspace(1)* undef
    call void asm sideeffect "", "a"(i32 1)
    %p1 = getelementptr inbounds i64, i64 addrspace(1)* %p, i32 %tid
    %p2 = getelementptr inbounds i64, i64 addrspace(1)* %p1, i32 8
    %p3 = getelementptr inbounds i64, i64 addrspace(1)* %p2, i32 16
    %p4 = getelementptr inbounds i64, i64 addrspace(1)* %p3, i32 24
    %p5 = getelementptr inbounds i64, i64 addrspace(1)* %p4, i32 32
    %v1 = load volatile i64, i64 addrspace(1)* %p1
    %v2 = load volatile i64, i64 addrspace(1)* %p2
    %v3 = load volatile i64, i64 addrspace(1)* %p3
    %v4 = load volatile i64, i64 addrspace(1)* %p4
    %v5 = load volatile i64, i64 addrspace(1)* %p5
    call void asm sideeffect "", "v,v,v,v,v"(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5)
    store volatile i64 %v1, i64 addrspace(1)* %p2
    store volatile i64 %v2, i64 addrspace(1)* %p3
    store volatile i64 %v3, i64 addrspace(1)* %p4
    store volatile i64 %v4, i64 addrspace(1)* %p5
    store volatile i64 %v5, i64 addrspace(1)* %p1
    ret void
  }

  declare i32 @llvm.amdgcn.workitem.id.x()

  attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123525/new/

https://reviews.llvm.org/D123525

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
  llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
  llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
  llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
  llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
  llvm/test/CodeGen/AMDGPU/agpr-copy-no-vgprs.mir
  llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir
  llvm/test/CodeGen/AMDGPU/agpr-remat.ll
  llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
  llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
  llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
  llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
  llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
  llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
  llvm/test/CodeGen/AMDGPU/spill-agpr.mir
  llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll