[llvm] r340859 - AMDGPU: Shrink insts to fold immediates
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 28 11:34:25 PDT 2018
Author: arsenm
Date: Tue Aug 28 11:34:24 2018
New Revision: 340859
URL: http://llvm.org/viewvc/llvm-project?rev=340859&view=rev
Log:
AMDGPU: Shrink insts to fold immediates
This needs to be done in the SSA fold operands
pass to be effective, so there is a bit of overlap
with SIShrinkInstructions but I don't think this
is practically avoidable.
Added:
llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Tue Aug 28 11:34:24 2018
@@ -35,13 +35,16 @@ struct FoldCandidate {
uint64_t ImmToFold;
int FrameIndexToFold;
};
+ int ShrinkOpcode;
unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind;
bool Commuted;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false) :
- UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+ bool Commuted_ = false,
+ int ShrinkOp = -1) :
+ UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Kind(FoldOp->getType()),
Commuted(Commuted_) {
if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm();
@@ -68,6 +71,14 @@ struct FoldCandidate {
bool isCommuted() const {
return Commuted;
}
+
+ bool needsShrink() const {
+ return ShrinkOpcode != -1;
+ }
+
+ int getShrinkOpcode() const {
+ return ShrinkOpcode;
+ }
};
class SIFoldOperands : public MachineFunctionPass {
@@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperands
}
static bool updateOperand(FoldCandidate &Fold,
+ const SIInstrInfo &TII,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@@ -189,10 +201,42 @@ static bool updateOperand(FoldCandidate
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
}
}
+
+ if (Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
+
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
+
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+ const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
+ unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace the outputs with dummy registers.
+ Dst0.setReg(NewReg0);
+ Dst1.setReg(NewReg1);
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
+ return true;
+ }
+
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
+ assert(!Fold.needsShrink() && "not handled");
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -261,6 +305,8 @@ static bool tryAddToFoldList(SmallVector
if (isUseMIInFoldList(FoldList, MI))
return false;
+ unsigned CommuteOpNo = OpNo;
+
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -269,11 +315,12 @@ static bool tryAddToFoldList(SmallVector
if (CanCommute) {
if (CommuteIdx0 == OpNo)
- OpNo = CommuteIdx1;
+ CommuteOpNo = CommuteIdx1;
else if (CommuteIdx1 == OpNo)
- OpNo = CommuteIdx0;
+ CommuteOpNo = CommuteIdx0;
}
+
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
@@ -286,12 +333,39 @@ static bool tryAddToFoldList(SmallVector
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false;
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+ if ((Opc == AMDGPU::V_ADD_I32_e64 ||
+ Opc == AMDGPU::V_SUB_I32_e64 ||
+ Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+ OpToFold->isImm()) {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
+ MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
+ return false;
+
+ const MachineOperand &SDst = MI->getOperand(1);
+ assert(SDst.isDef());
+
+ // TODO: Handle cases with a used carry.
+ if (!MRI.use_nodbg_empty(SDst.getReg()))
+ return false;
+
+ int Op32 = AMDGPU::getVOPe32(Opc);
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
+ Op32));
+ return true;
+ }
+
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false;
}
- FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+ FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
return true;
}
@@ -757,7 +831,7 @@ void SIFoldOperands::foldInstOperand(Mac
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, *TRI)) {
+ if (updateOperand(Fold, *TII, *TRI)) {
// Clear kill flags.
if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Aug 28 11:34:24 2018
@@ -2572,7 +2572,60 @@ bool SIInstrInfo::canShrink(const Machin
// Check output modifiers
return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
!hasModifiersSet(MI, AMDGPU::OpName::clamp);
+}
+
+// Set VCC operand with all flags from \p Orig, except for setting it as
+// implicit.
+static void copyFlagsToImplicitVCC(MachineInstr &MI,
+ const MachineOperand &Orig) {
+
+ for (MachineOperand &Use : MI.implicit_operands()) {
+ if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+ Use.setIsUndef(Orig.isUndef());
+ Use.setIsKill(Orig.isKill());
+ return;
+ }
+ }
+}
+
+MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
+ unsigned Op32) const {
+ MachineBasicBlock *MBB = MI.getParent();;
+ MachineInstrBuilder Inst32 =
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+
+ // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
+ // For VOPC instructions, this is replaced by an implicit def of vcc.
+ int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
+ if (Op32DstIdx != -1) {
+ // dst
+ Inst32.add(MI.getOperand(0));
+ } else {
+ assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ "Unexpected case");
+ }
+
+ Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
+
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1)
+ Inst32.add(*Src1);
+
+ const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.add(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc. This was already added
+ // during the initial BuildMI, so find it to preserve the flags.
+ copyFlagsToImplicitVCC(*Inst32, *Src2);
+ }
+ }
+ return Inst32;
}
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Tue Aug 28 11:34:24 2018
@@ -694,6 +694,9 @@ public:
bool canShrink(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
+ MachineInstr *buildShrunkInst(MachineInstr &MI,
+ unsigned NewOpcode) const;
+
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Tue Aug 28 11:34:24 2018
@@ -120,19 +120,6 @@ static bool foldImmediates(MachineInstr
return false;
}
-// Copy MachineOperand with all flags except setting it as implicit.
-static void copyFlagsToImplicitVCC(MachineInstr &MI,
- const MachineOperand &Orig) {
-
- for (MachineOperand &Use : MI.implicit_operands()) {
- if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
- Use.setIsUndef(Orig.isUndef());
- Use.setIsKill(Orig.isKill());
- return;
- }
- }
-}
-
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
return isInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(),
@@ -434,40 +421,7 @@ bool SIShrinkInstructions::runOnMachineF
// We can shrink this instruction
LLVM_DEBUG(dbgs() << "Shrinking " << MI);
- MachineInstrBuilder Inst32 =
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
- // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
- // For VOPC instructions, this is replaced by an implicit def of vcc.
- int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
- if (Op32DstIdx != -1) {
- // dst
- Inst32.add(MI.getOperand(0));
- } else {
- assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
- "Unexpected case");
- }
-
-
- Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
- const MachineOperand *Src1 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1)
- Inst32.add(*Src1);
-
- if (Src2) {
- int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
- if (Op32Src2Idx != -1) {
- Inst32.add(*Src2);
- } else {
- // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
- // replaced with an implicit read of vcc. This was already added
- // during the initial BuildMI, so find it to preserve the flags.
- copyFlagsToImplicitVCC(*Inst32, *Src2);
- }
- }
-
+ MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
++NumInstructionsShrunk;
// Copy extra operands not present in the instruction definition.
Added: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir?rev=340859&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir Tue Aug 28 11:34:24 2018
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# Uses a carry out in an instruction that can't be shrunk.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+
+ %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %5
+
+...
+---
+
+# TODO: Is it OK to leave the broken use around on the DBG_VALUE?
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: DBG_VALUE debug-use %5:sreg_64_xexec, debug-use $noreg
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+
+ %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+ DBG_VALUE debug-use %5, debug-use $noreg
+ S_ENDPGM implicit %4
+
+...
+
+---
+
+# Uses carry out in a normal pattern
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
+ ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+
+ %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+ %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec
+ S_ENDPGM implicit %6
+
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir?rev=340859&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir Tue Aug 28 11:34:24 2018
@@ -0,0 +1,347 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32_xm0 = S_MOV_B32 12345
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+---
+
+# This does not shrink because it would violate the constant bus
+# restriction. to have an SGPR input and an immediate, so a copy would
+# be required.
+
+name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+ ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
+ %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+ %1:sreg_32_xm0 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
+ ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
+ %0:sreg_32_xm0 = IMPLICIT_DEF
+ %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
+ ; GCN: $vcc = S_MOV_B64 -1
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
+ $vcc = S_MOV_B64 -1
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2, implicit $vcc
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: $vcc = S_MOV_B64 -1
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: bb.1:
+ ; GCN: liveins: $vcc
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
+ bb.0:
+ successors: %bb.1
+ $vcc = S_MOV_B64 -1
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+ bb.1:
+ liveins: $vcc
+ S_ENDPGM implicit %2, implicit $vcc
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: bb.1:
+ ; GCN: liveins: $vcc_lo
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+ bb.0:
+ successors: %bb.1
+ $vcc = S_MOV_B64 -1
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+ bb.1:
+ liveins: $vcc_lo
+ S_ENDPGM implicit %2, implicit $vcc_lo
+
+...
+---
+
+# This is not OK to clobber because vcc_lo has a livein use.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: $vcc = S_MOV_B64 -1
+ ; GCN: bb.1:
+ ; GCN: liveins: $vcc
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+ bb.0:
+ successors: %bb.1
+ $vcc = S_MOV_B64 -1
+
+ bb.1:
+ liveins: $vcc
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2, implicit $vcc_lo
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: $vcc_hi = S_MOV_B32 -1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: liveins: $vcc_hi
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: bb.2:
+ ; GCN: liveins: $vcc_hi
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi
+ bb.0:
+ successors: %bb.1
+ $vcc_hi = S_MOV_B32 -1
+
+ bb.1:
+ liveins: $vcc_hi
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+ bb.2:
+ liveins: $vcc_hi
+
+ S_ENDPGM implicit %2, implicit $vcc_hi
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32_xm0 = S_MOV_B32 12345
+ %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32_xm0 = S_MOV_B32 12345
+ %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+# We know this is OK because vcc isn't live out of the block, even
+# though it had a defined value
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+ ; GCN: bb.1:
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
+ bb.0:
+ successors: %bb.1
+
+ $vcc = S_MOV_B64 -1
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+ bb.1:
+ S_ENDPGM implicit %2
+
+...
More information about the llvm-commits
mailing list