[llvm] 49a533a - AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (#129059)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 3 01:22:51 PST 2025
Author: Matt Arsenault
Date: 2025-03-03T16:22:47+07:00
New Revision: 49a533a4859eac99efac3220a1ffc62616cb3664
URL: https://github.com/llvm/llvm-project/commit/49a533a4859eac99efac3220a1ffc62616cb3664
DIFF: https://github.com/llvm/llvm-project/commit/49a533a4859eac99efac3220a1ffc62616cb3664.diff
LOG: AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (#129059)
This was trying to hack around the intermediate VGPR requirement
to copy to AGPRs on gfx908. We should still use a copy for all
reg-to-reg cases. This should matter less these days, as we
reserve a VGPR to handle it when required (and no end to end tests
need updating).
This was also an obstacle to handling this fold for input registers
which are larger than 32-bits.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6cb6863068b5f..eb9aabf8b6317 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const {
Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
}
- auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
- BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
- .addReg(Vgpr);
+ Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr);
B.addReg(Tmp);
}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
index f45b35e239587..9d167f578e9eb 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir
@@ -206,11 +206,11 @@ body: |
; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sgpr_32 = S_MOV_B32 999
@@ -232,10 +232,10 @@ body: |
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec
- ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]]
+ ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]]
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
%0:sgpr_32 = S_MOV_B32 999
More information about the llvm-commits
mailing list