[llvm] r340862 - AMDGPU: Force shrinking of add/sub even if the carry is used
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 28 11:44:17 PDT 2018
Author: arsenm
Date: Tue Aug 28 11:44:16 2018
New Revision: 340862
URL: http://llvm.org/viewvc/llvm-project?rev=340862&view=rev
Log:
AMDGPU: Force shrinking of add/sub even if the carry is used
The original motivating example uses a 64-bit add, so the carry
is used. Insert a copy from VCC. This may allow shrinking of
the used carry instruction. At worst, we are replacing a
mov to materialize the constant with a copy of vcc.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=340862&r1=340861&r2=340862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Tue Aug 28 11:44:16 2018
@@ -208,12 +208,14 @@ static bool updateOperand(FoldCandidate
if (Liveness != MachineBasicBlock::LQR_Dead)
return false;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
int Op32 = Fold.getShrinkOpcode();
MachineOperand &Dst0 = MI->getOperand(0);
MachineOperand &Dst1 = MI->getOperand(1);
assert(Dst0.isDef() && Dst1.isDef());
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
@@ -221,6 +223,11 @@ static bool updateOperand(FoldCandidate
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
+ }
+
// Keep the old instruction around to avoid breaking iterators, but
// replace the outputs with dummy registers.
Dst0.setReg(NewReg0);
@@ -351,10 +358,6 @@ static bool tryAddToFoldList(SmallVector
const MachineOperand &SDst = MI->getOperand(1);
assert(SDst.isDef());
- // TODO: Handle cases with a used carry.
- if (!MRI.use_nodbg_empty(SDst.getReg()))
- return false;
-
int Op32 = AMDGPU::getVOPe32(Opc);
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
Modified: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir?rev=340862&r1=340861&r2=340862&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir Tue Aug 28 11:44:16 2018
@@ -13,8 +13,9 @@ body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
- ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]]
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
+ ; GCN: S_ENDPGM implicit [[COPY]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
@@ -26,6 +27,31 @@ body: |
...
---
+name: shrink_scalar_imm_multi_use_with_used_carry
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_multi_use_with_used_carry
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = IMPLICIT_DEF
+
+ %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+ %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, implicit $exec
+ S_ENDPGM implicit %6, implicit %7
+
+...
+---
+
# TODO: Is it OK to leave the broken use around on the DBG_VALUE?
name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
@@ -64,8 +90,9 @@ body: |
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
- ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
+ ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
More information about the llvm-commits
mailing list