[llvm] r340859 - AMDGPU: Shrink insts to fold immediates

Tue Aug 28 11:34:25 PDT 2018

Author: arsenm
Date: Tue Aug 28 11:34:24 2018
New Revision: 340859

URL: http://llvm.org/viewvc/llvm-project?rev=340859&view=rev
Log:
AMDGPU: Shrink insts to fold immediates

This needs to be done in the SSA fold operands
pass to be effective, so there is a bit of overlap
with SIShrinkInstructions but I don't think this
is practically avoidable.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
    llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Tue Aug 28 11:34:24 2018
@@ -35,13 +35,16 @@ struct FoldCandidate {
     uint64_t ImmToFold;
     int FrameIndexToFold;
   };
+  int ShrinkOpcode;
   unsigned char UseOpNo;
   MachineOperand::MachineOperandType Kind;
   bool Commuted;
 
   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
-                bool Commuted_ = false) :
-    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+                bool Commuted_ = false,
+                int ShrinkOp = -1) :
+    UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+    Kind(FoldOp->getType()),
     Commuted(Commuted_) {
     if (FoldOp->isImm()) {
       ImmToFold = FoldOp->getImm();
@@ -68,6 +71,14 @@ struct FoldCandidate {
   bool isCommuted() const {
     return Commuted;
   }
+
+  bool needsShrink() const {
+    return ShrinkOpcode != -1;
+  }
+
+  int getShrinkOpcode() const {
+    return ShrinkOpcode;
+  }
 };
 
 class SIFoldOperands : public MachineFunctionPass {
@@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperands
 }
 
 static bool updateOperand(FoldCandidate &Fold,
+                          const SIInstrInfo &TII,
                           const TargetRegisterInfo &TRI) {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@@ -189,10 +201,42 @@ static bool updateOperand(FoldCandidate
         Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
       }
     }
+
+    if (Fold.needsShrink()) {
+      MachineBasicBlock *MBB = MI->getParent();
+      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+      if (Liveness != MachineBasicBlock::LQR_Dead)
+        return false;
+
+      int Op32 = Fold.getShrinkOpcode();
+      MachineOperand &Dst0 = MI->getOperand(0);
+      MachineOperand &Dst1 = MI->getOperand(1);
+      assert(Dst0.isDef() && Dst1.isDef());
+
+      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
+      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
+      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+
+      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+
+      // Keep the old instruction around to avoid breaking iterators, but
+      // replace the outputs with dummy registers.
+      Dst0.setReg(NewReg0);
+      Dst1.setReg(NewReg1);
+
+      if (Fold.isCommuted())
+        TII.commuteInstruction(*Inst32, false);
+      return true;
+    }
+
     Old.ChangeToImmediate(Fold.ImmToFold);
     return true;
   }
 
+  assert(!Fold.needsShrink() && "not handled");
+
   if (Fold.isFI()) {
     Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
     return true;
@@ -261,6 +305,8 @@ static bool tryAddToFoldList(SmallVector
     if (isUseMIInFoldList(FoldList, MI))
       return false;
 
+    unsigned CommuteOpNo = OpNo;
+
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
     unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -269,11 +315,12 @@ static bool tryAddToFoldList(SmallVector
 
     if (CanCommute) {
       if (CommuteIdx0 == OpNo)
-        OpNo = CommuteIdx1;
+        CommuteOpNo = CommuteIdx1;
       else if (CommuteIdx1 == OpNo)
-        OpNo = CommuteIdx0;
+        CommuteOpNo = CommuteIdx0;
     }
 
+
     // One of operands might be an Imm operand, and OpNo may refer to it after
     // the call of commuteInstruction() below. Such situations are avoided
     // here explicitly as OpNo must be a register operand to be a candidate
@@ -286,12 +333,39 @@ static bool tryAddToFoldList(SmallVector
         !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
       return false;
 
-    if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+    if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+      if ((Opc == AMDGPU::V_ADD_I32_e64 ||
+           Opc == AMDGPU::V_SUB_I32_e64 ||
+           Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+          OpToFold->isImm()) {
+        MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+        // Verify the other operand is a VGPR, otherwise we would violate the
+        // constant bus restriction.
+        unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
+        MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+        if (!OtherOp.isReg() ||
+            !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
+          return false;
+
+        const MachineOperand &SDst = MI->getOperand(1);
+        assert(SDst.isDef());
+
+        // TODO: Handle cases with a used carry.
+        if (!MRI.use_nodbg_empty(SDst.getReg()))
+          return false;
+
+        int Op32 =  AMDGPU::getVOPe32(Opc);
+        FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
+                                         Op32));
+        return true;
+      }
+
       TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
       return false;
     }
 
-    FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+    FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
     return true;
   }
 
@@ -757,7 +831,7 @@ void SIFoldOperands::foldInstOperand(Mac
     Copy->addImplicitDefUseOperands(*MF);
 
   for (FoldCandidate &Fold : FoldList) {
-    if (updateOperand(Fold, *TRI)) {
+    if (updateOperand(Fold, *TII, *TRI)) {
       // Clear kill flags.
       if (Fold.isReg()) {
         assert(Fold.OpToFold && Fold.OpToFold->isReg());

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Aug 28 11:34:24 2018
@@ -2572,7 +2572,60 @@ bool SIInstrInfo::canShrink(const Machin
   // Check output modifiers
   return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
          !hasModifiersSet(MI, AMDGPU::OpName::clamp);
+}
+
+// Set VCC operand with all flags from \p Orig, except for setting it as
+// implicit.
+static void copyFlagsToImplicitVCC(MachineInstr &MI,
+                                   const MachineOperand &Orig) {
+
+  for (MachineOperand &Use : MI.implicit_operands()) {
+    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+      Use.setIsUndef(Orig.isUndef());
+      Use.setIsKill(Orig.isKill());
+      return;
+    }
+  }
+}
+
+MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
+                                           unsigned Op32) const {
+  MachineBasicBlock *MBB = MI.getParent();;
+  MachineInstrBuilder Inst32 =
+    BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+
+  // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
+  // For VOPC instructions, this is replaced by an implicit def of vcc.
+  int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
+  if (Op32DstIdx != -1) {
+    // dst
+    Inst32.add(MI.getOperand(0));
+  } else {
+    assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+           "Unexpected case");
+  }
+
+  Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
+
+  const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+  if (Src1)
+    Inst32.add(*Src1);
+
+  const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+
+  if (Src2) {
+    int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+    if (Op32Src2Idx != -1) {
+      Inst32.add(*Src2);
+    } else {
+      // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+      // replaced with an implicit read of vcc. This was already added
+      // during the initial BuildMI, so find it to preserve the flags.
+      copyFlagsToImplicitVCC(*Inst32, *Src2);
+    }
+  }
 
+  return Inst32;
 }
 
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Tue Aug 28 11:34:24 2018
@@ -694,6 +694,9 @@ public:
   bool canShrink(const MachineInstr &MI,
                  const MachineRegisterInfo &MRI) const;
 
+  MachineInstr *buildShrunkInst(MachineInstr &MI,
+                                unsigned NewOpcode) const;
+
   bool verifyInstruction(const MachineInstr &MI,
                          StringRef &ErrInfo) const override;
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=340859&r1=340858&r2=340859&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Tue Aug 28 11:34:24 2018
@@ -120,19 +120,6 @@ static bool foldImmediates(MachineInstr
   return false;
 }
 
-// Copy MachineOperand with all flags except setting it as implicit.
-static void copyFlagsToImplicitVCC(MachineInstr &MI,
-                                   const MachineOperand &Orig) {
-
-  for (MachineOperand &Use : MI.implicit_operands()) {
-    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
-      Use.setIsUndef(Orig.isUndef());
-      Use.setIsKill(Orig.isKill());
-      return;
-    }
-  }
-}
-
 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
   return isInt<16>(Src.getImm()) &&
     !TII->isInlineConstant(*Src.getParent(),
@@ -434,40 +421,7 @@ bool SIShrinkInstructions::runOnMachineF
       // We can shrink this instruction
       LLVM_DEBUG(dbgs() << "Shrinking " << MI);
 
-      MachineInstrBuilder Inst32 =
-          BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
-
-      // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
-      // For VOPC instructions, this is replaced by an implicit def of vcc.
-      int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
-      if (Op32DstIdx != -1) {
-        // dst
-        Inst32.add(MI.getOperand(0));
-      } else {
-        assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
-               "Unexpected case");
-      }
-
-
-      Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
-
-      const MachineOperand *Src1 =
-          TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-      if (Src1)
-        Inst32.add(*Src1);
-
-      if (Src2) {
-        int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
-        if (Op32Src2Idx != -1) {
-          Inst32.add(*Src2);
-        } else {
-          // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
-          // replaced with an implicit read of vcc. This was already added
-          // during the initial BuildMI, so find it to preserve the flags.
-          copyFlagsToImplicitVCC(*Inst32, *Src2);
-        }
-      }
-
+      MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
       ++NumInstructionsShrunk;
 
       // Copy extra operands not present in the instruction definition.

Added: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir?rev=340859&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir Tue Aug 28 11:34:24 2018
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination  %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# Uses a carry out in an instruction that can't be shrunk.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %5
+
+...
+---
+
+# TODO: Is it OK to leave the broken use around on the DBG_VALUE?
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: DBG_VALUE debug-use %5:sreg_64_xexec, debug-use $noreg
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+    DBG_VALUE debug-use %5, debug-use $noreg
+    S_ENDPGM implicit %4
+
+...
+
+---
+
+# Uses carry out in a normal pattern
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
+    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = IMPLICIT_DEF
+
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
+    %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec
+    S_ENDPGM implicit %6
+
+...

Added: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir?rev=340859&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir Tue Aug 28 11:34:24 2018
@@ -0,0 +1,347 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination  %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32_xm0 = S_MOV_B32 12345
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+---
+
+# This does not shrink because it would violate the constant bus
+# restriction. to have an SGPR input and an immediate, so a copy would
+# be required.
+
+name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+    ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
+    %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+    %1:sreg_32_xm0 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
+    ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
+    %0:sreg_32_xm0 = IMPLICIT_DEF
+    %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
+    ; GCN: $vcc = S_MOV_B64 -1
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
+    $vcc = S_MOV_B64 -1
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2, implicit $vcc
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   $vcc = S_MOV_B64 -1
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vcc
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
+  bb.0:
+    successors: %bb.1
+    $vcc = S_MOV_B64 -1
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+  bb.1:
+    liveins: $vcc
+    S_ENDPGM implicit %2, implicit $vcc
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vcc_lo
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+  bb.0:
+    successors: %bb.1
+    $vcc = S_MOV_B64 -1
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+  bb.1:
+    liveins: $vcc_lo
+    S_ENDPGM implicit %2, implicit $vcc_lo
+
+...
+---
+
+# This is not OK to clobber because vcc_lo has a livein use.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   $vcc = S_MOV_B64 -1
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vcc
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+  bb.0:
+    successors: %bb.1
+    $vcc = S_MOV_B64 -1
+
+  bb.1:
+    liveins: $vcc
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2, implicit $vcc_lo
+
+...
+---
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   $vcc_hi = S_MOV_B32 -1
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   liveins: $vcc_hi
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GCN: bb.2:
+  ; GCN:   liveins: $vcc_hi
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi
+  bb.0:
+    successors: %bb.1
+    $vcc_hi = S_MOV_B32 -1
+
+  bb.1:
+    liveins: $vcc_hi
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+  bb.2:
+    liveins: $vcc_hi
+
+    S_ENDPGM implicit %2, implicit $vcc_hi
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32_xm0 = S_MOV_B32 12345
+    %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+    ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32_xm0 = S_MOV_B32 12345
+    %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
+    S_ENDPGM implicit %2
+
+...
+
+---
+
+# We know this is OK because vcc isn't live out of the block, even
+# though it had a defined value
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e64_]]
+  bb.0:
+    successors: %bb.1
+
+    $vcc = S_MOV_B64 -1
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+
+  bb.1:
+    S_ENDPGM implicit %2
+
+...