[llvm] 69879ff - AMDGPU: Fix using illegal VOP3 literal in frame index elimination (#115747)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 08:01:08 PST 2024


Author: Matt Arsenault
Date: 2024-11-13T08:01:01-08:00
New Revision: 69879ffaec8789dd4ce5f6fa26f1b5e8140190ff

URL: https://github.com/llvm/llvm-project/commit/69879ffaec8789dd4ce5f6fa26f1b5e8140190ff
DIFF: https://github.com/llvm/llvm-project/commit/69879ffaec8789dd4ce5f6fa26f1b5e8140190ff.diff

LOG: AMDGPU: Fix using illegal VOP3 literal in frame index elimination (#115747)

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
    llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
    llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
    llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f76d1266f495cf..246ef7ad481ab7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  DebugLoc DL = MI->getDebugLoc();
+  const DebugLoc &DL = MI->getDebugLoc();
 
   assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
 
@@ -2496,6 +2496,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       Register MaterializedReg = FrameReg;
       Register ScavengedVGPR;
 
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      // For the non-immediate case, we could fall through to the default
+      // handling, but we do an in-place update of the result register here to
+      // avoid scavenging another register.
+      if (OtherOp->isImm()) {
+        int64_t TotalOffset = OtherOp->getImm() + Offset;
+
+        if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) &&
+            !AMDGPU::isInlinableIntLiteral(TotalOffset)) {
+          // If we can't support a VOP3 literal in the VALU instruction, we
+          // can't specially fold into the add.
+          // TODO: Handle VOP3->VOP2 shrink to support the fold.
+          break;
+        }
+
+        OtherOp->setImm(TotalOffset);
+        Offset = 0;
+      }
+
       if (FrameReg && !ST.enableFlatScratch()) {
         // We should just do an in-place update of the result register. However,
         // the value there may also be used by the add, in which case we need a
@@ -2516,15 +2535,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         MaterializedReg = ScavengedVGPR;
       }
 
-      int64_t Offset = FrameInfo.getObjectOffset(Index);
-      // For the non-immediate case, we could fall through to the default
-      // handling, but we do an in-place update of the result register here to
-      // avoid scavenging another register.
-      if (OtherOp->isImm()) {
-        OtherOp->setImm(OtherOp->getImm() + Offset);
-        Offset = 0;
-      }
-
       if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
         if (ST.enableFlatScratch() &&
             !TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
@@ -2761,411 +2771,408 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       return true;
     }
     default: {
-      // Other access to frame index
-      const DebugLoc &DL = MI->getDebugLoc();
+      break;
+    }
+    }
 
-      int64_t Offset = FrameInfo.getObjectOffset(Index);
-      if (ST.enableFlatScratch()) {
-        if (TII->isFLATScratch(*MI)) {
-          assert((int16_t)FIOperandNum ==
-                 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                            AMDGPU::OpName::saddr));
+    int64_t Offset = FrameInfo.getObjectOffset(Index);
+    if (ST.enableFlatScratch()) {
+      if (TII->isFLATScratch(*MI)) {
+        assert(
+            (int16_t)FIOperandNum ==
+            AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
 
-          // The offset is always swizzled, just replace it
-          if (FrameReg)
-            FIOp->ChangeToRegister(FrameReg, false);
+        // The offset is always swizzled, just replace it
+        if (FrameReg)
+          FIOp->ChangeToRegister(FrameReg, false);
 
-          MachineOperand *OffsetOp =
+        MachineOperand *OffsetOp =
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
-          int64_t NewOffset = Offset + OffsetOp->getImm();
-          if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
-                                     SIInstrFlags::FlatScratch)) {
-            OffsetOp->setImm(NewOffset);
-            if (FrameReg)
-              return false;
-            Offset = 0;
-          }
+        int64_t NewOffset = Offset + OffsetOp->getImm();
+        if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+                                   SIInstrFlags::FlatScratch)) {
+          OffsetOp->setImm(NewOffset);
+          if (FrameReg)
+            return false;
+          Offset = 0;
+        }
 
-          if (!Offset) {
-            unsigned Opc = MI->getOpcode();
-            int NewOpc = -1;
-            if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
-              NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
-            } else if (ST.hasFlatScratchSTMode()) {
-              // On GFX10 we have ST mode to use no registers for an address.
-              // Otherwise we need to materialize 0 into an SGPR.
-              NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
-            }
+        if (!Offset) {
+          unsigned Opc = MI->getOpcode();
+          int NewOpc = -1;
+          if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
+            NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+          } else if (ST.hasFlatScratchSTMode()) {
+            // On GFX10 we have ST mode to use no registers for an address.
+            // Otherwise we need to materialize 0 into an SGPR.
+            NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+          }
 
-            if (NewOpc != -1) {
-              // removeOperand doesn't fixup tied operand indexes as it goes, so
-              // it asserts. Untie vdst_in for now and retie them afterwards.
-              int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
-                                                     AMDGPU::OpName::vdst_in);
-              bool TiedVDst = VDstIn != -1 &&
-                              MI->getOperand(VDstIn).isReg() &&
-                              MI->getOperand(VDstIn).isTied();
-              if (TiedVDst)
-                MI->untieRegOperand(VDstIn);
-
-              MI->removeOperand(
-                  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-
-              if (TiedVDst) {
-                int NewVDst =
-                    AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
-                int NewVDstIn =
-                    AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
-                assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
-                MI->tieOperands(NewVDst, NewVDstIn);
-              }
-              MI->setDesc(TII->get(NewOpc));
-              return false;
+          if (NewOpc != -1) {
+            // removeOperand doesn't fixup tied operand indexes as it goes, so
+            // it asserts. Untie vdst_in for now and retie them afterwards.
+            int VDstIn =
+                AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+            bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
+                            MI->getOperand(VDstIn).isTied();
+            if (TiedVDst)
+              MI->untieRegOperand(VDstIn);
+
+            MI->removeOperand(
+                AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+
+            if (TiedVDst) {
+              int NewVDst =
+                  AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+              int NewVDstIn =
+                  AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
+              assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
+              MI->tieOperands(NewVDst, NewVDstIn);
             }
+            MI->setDesc(TII->get(NewOpc));
+            return false;
           }
         }
+      }
 
-        if (!FrameReg) {
-          FIOp->ChangeToImmediate(Offset);
-          if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
-            return false;
-        }
+      if (!FrameReg) {
+        FIOp->ChangeToImmediate(Offset);
+        if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
+          return false;
+      }
 
-        // We need to use register here. Check if we can use an SGPR or need
-        // a VGPR.
-        FIOp->ChangeToRegister(AMDGPU::M0, false);
-        bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
+      // We need to use register here. Check if we can use an SGPR or need
+      // a VGPR.
+      FIOp->ChangeToRegister(AMDGPU::M0, false);
+      bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
 
-        if (!Offset && FrameReg && UseSGPR) {
-          FIOp->setReg(FrameReg);
-          return false;
-        }
+      if (!Offset && FrameReg && UseSGPR) {
+        FIOp->setReg(FrameReg);
+        return false;
+      }
 
-        const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
-                                                : &AMDGPU::VGPR_32RegClass;
+      const TargetRegisterClass *RC =
+          UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
 
-        Register TmpReg =
-            RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
-        FIOp->setReg(TmpReg);
-        FIOp->setIsKill();
+      Register TmpReg =
+          RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
+      FIOp->setReg(TmpReg);
+      FIOp->setIsKill();
 
-        if ((!FrameReg || !Offset) && TmpReg) {
-          unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
-          auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
-          if (FrameReg)
-            MIB.addReg(FrameReg);
-          else
-            MIB.addImm(Offset);
+      if ((!FrameReg || !Offset) && TmpReg) {
+        unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+        auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+        if (FrameReg)
+          MIB.addReg(FrameReg);
+        else
+          MIB.addImm(Offset);
 
-          return false;
-        }
+        return false;
+      }
 
-        bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
-                           !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+      bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+                         !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
 
-        Register TmpSReg =
-            UseSGPR ? TmpReg
-                    : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
-                                                    MI, false, 0, !UseSGPR);
+      Register TmpSReg =
+          UseSGPR ? TmpReg
+                  : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+                                                  MI, false, 0, !UseSGPR);
 
-        // TODO: for flat scratch another attempt can be made with a VGPR index
-        //       if no SGPRs can be scavenged.
-        if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
-          report_fatal_error("Cannot scavenge register in FI elimination!");
+      // TODO: for flat scratch another attempt can be made with a VGPR index
+      //       if no SGPRs can be scavenged.
+      if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+        report_fatal_error("Cannot scavenge register in FI elimination!");
 
-        if (!TmpSReg) {
-          // Use frame register and restore it after.
-          TmpSReg = FrameReg;
-          FIOp->setReg(FrameReg);
-          FIOp->setIsKill(false);
-        }
+      if (!TmpSReg) {
+        // Use frame register and restore it after.
+        TmpSReg = FrameReg;
+        FIOp->setReg(FrameReg);
+        FIOp->setIsKill(false);
+      }
 
-        if (NeedSaveSCC) {
-          assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
-              .addReg(FrameReg)
-              .addImm(Offset);
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
-              .addReg(TmpSReg)
-              .addImm(0);
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+      if (NeedSaveSCC) {
+        assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+            .addReg(FrameReg)
+            .addImm(Offset);
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+            .addReg(TmpSReg)
+            .addImm(0);
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+            .addImm(0)
+            .addReg(TmpSReg);
+      } else {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+            .addReg(FrameReg)
+            .addImm(Offset);
+      }
+
+      if (!UseSGPR)
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+            .addReg(TmpSReg, RegState::Kill);
+
+      if (TmpSReg == FrameReg) {
+        // Undo frame register modification.
+        if (NeedSaveSCC &&
+            !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
+          MachineBasicBlock::iterator I =
+              BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+                      TmpSReg)
+                  .addReg(FrameReg)
+                  .addImm(-Offset);
+          I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+                  .addReg(TmpSReg)
+                  .addImm(0);
+          BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+                  TmpSReg)
               .addImm(0)
               .addReg(TmpSReg);
         } else {
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+          BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+                  FrameReg)
               .addReg(FrameReg)
-              .addImm(Offset);
+              .addImm(-Offset);
         }
+      }
 
-        if (!UseSGPR)
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
-            .addReg(TmpSReg, RegState::Kill);
-
-        if (TmpSReg == FrameReg) {
-          // Undo frame register modification.
-          if (NeedSaveSCC &&
-              !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
-            MachineBasicBlock::iterator I =
-                BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
-                        TmpSReg)
-                    .addReg(FrameReg)
-                    .addImm(-Offset);
-            I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
-                    .addReg(TmpSReg)
-                    .addImm(0);
-            BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
-                    TmpSReg)
-                .addImm(0)
-                .addReg(TmpSReg);
-          } else {
-            BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
-                    FrameReg)
-                .addReg(FrameReg)
-                .addImm(-Offset);
-          }
-        }
+      return false;
+    }
 
-        return false;
-      }
+    bool IsMUBUF = TII->isMUBUF(*MI);
+
+    if (!IsMUBUF && !MFI->isBottomOfStack()) {
+      // Convert to a swizzled stack address by scaling by the wave size.
+      // In an entry function/kernel the offset is already swizzled.
+      bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+      bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+                     !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+      const TargetRegisterClass *RC = IsSALU && !LiveSCC
+                                          ? &AMDGPU::SReg_32RegClass
+                                          : &AMDGPU::VGPR_32RegClass;
+      bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+                    MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+                    MI->getOpcode() == AMDGPU::S_MOV_B32;
+      Register ResultReg =
+          IsCopy ? MI->getOperand(0).getReg()
+                 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
 
-      bool IsMUBUF = TII->isMUBUF(*MI);
-
-      if (!IsMUBUF && !MFI->isBottomOfStack()) {
-        // Convert to a swizzled stack address by scaling by the wave size.
-        // In an entry function/kernel the offset is already swizzled.
-        bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
-        bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
-                       !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
-        const TargetRegisterClass *RC = IsSALU && !LiveSCC
-                                            ? &AMDGPU::SReg_32RegClass
-                                            : &AMDGPU::VGPR_32RegClass;
-        bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
-                      MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
-                      MI->getOpcode() == AMDGPU::S_MOV_B32;
-        Register ResultReg =
-            IsCopy ? MI->getOperand(0).getReg()
-                   : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
-
-        int64_t Offset = FrameInfo.getObjectOffset(Index);
-        if (Offset == 0) {
-          unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
-                                               : AMDGPU::V_LSHRREV_B32_e64;
-          Register TmpResultReg = ResultReg;
-          if (IsSALU && LiveSCC) {
-            TmpResultReg = RS->scavengeRegisterBackwards(
-                AMDGPU::VGPR_32RegClass, MI, false, 0);
-          }
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      if (Offset == 0) {
+        unsigned OpCode =
+            IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
+        Register TmpResultReg = ResultReg;
+        if (IsSALU && LiveSCC) {
+          TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+                                                       MI, false, 0);
+        }
 
-          auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
-          if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
-            // For V_LSHRREV, the operands are reversed (the shift count goes
-            // first).
-            Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
-          else
-            Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
-          if (IsSALU && !LiveSCC)
-            Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
-          if (IsSALU && LiveSCC) {
-            Register NewDest =
-                IsCopy ? ResultReg
-                       : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
-                                                       Shift, false, 0);
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-                    NewDest)
-                .addReg(TmpResultReg);
-            ResultReg = NewDest;
-          }
-        } else {
-          MachineInstrBuilder MIB;
-          if (!IsSALU) {
-            if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
-                nullptr) {
-              // Reuse ResultReg in intermediate step.
-              Register ScaledReg = ResultReg;
-
-              BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
-                      ScaledReg)
+        auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
+        if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
+          // For V_LSHRREV, the operands are reversed (the shift count goes
+          // first).
+          Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
+        else
+          Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
+        if (IsSALU && !LiveSCC)
+          Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
+        if (IsSALU && LiveSCC) {
+          Register NewDest =
+              IsCopy ? ResultReg
+                     : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+                                                     Shift, false, 0);
+          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
+              .addReg(TmpResultReg);
+          ResultReg = NewDest;
+        }
+      } else {
+        MachineInstrBuilder MIB;
+        if (!IsSALU) {
+          if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+              nullptr) {
+            // Reuse ResultReg in intermediate step.
+            Register ScaledReg = ResultReg;
+
+            BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                    ScaledReg)
                 .addImm(ST.getWavefrontSizeLog2())
                 .addReg(FrameReg);
 
-              const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+            const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
 
-              // TODO: Fold if use instruction is another add of a constant.
-              if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
-                // FIXME: This can fail
-                MIB.addImm(Offset);
-                MIB.addReg(ScaledReg, RegState::Kill);
-                if (!IsVOP2)
-                  MIB.addImm(0); // clamp bit
-              } else {
-                assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
-                       "Need to reuse carry out register");
-
-                // Use scavenged unused carry out as offset register.
-                Register ConstOffsetReg;
-                if (!isWave32)
-                  ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
-                else
-                  ConstOffsetReg = MIB.getReg(1);
-
-                BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
-                    .addImm(Offset);
-                MIB.addReg(ConstOffsetReg, RegState::Kill);
-                MIB.addReg(ScaledReg, RegState::Kill);
+            // TODO: Fold if use instruction is another add of a constant.
+            if (IsVOP2 ||
+                AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+              // FIXME: This can fail
+              MIB.addImm(Offset);
+              MIB.addReg(ScaledReg, RegState::Kill);
+              if (!IsVOP2)
                 MIB.addImm(0); // clamp bit
-              }
+            } else {
+              assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
+                     "Need to reuse carry out register");
+
+              // Use scavenged unused carry out as offset register.
+              Register ConstOffsetReg;
+              if (!isWave32)
+                ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+              else
+                ConstOffsetReg = MIB.getReg(1);
+
+              BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32),
+                      ConstOffsetReg)
+                  .addImm(Offset);
+              MIB.addReg(ConstOffsetReg, RegState::Kill);
+              MIB.addReg(ScaledReg, RegState::Kill);
+              MIB.addImm(0); // clamp bit
             }
           }
-          if (!MIB || IsSALU) {
-            // We have to produce a carry out, and there isn't a free SGPR pair
-            // for it. We can keep the whole computation on the SALU to avoid
-            // clobbering an additional register at the cost of an extra mov.
-
-            // We may have 1 free scratch SGPR even though a carry out is
-            // unavailable. Only one additional mov is needed.
-            Register TmpScaledReg = IsCopy && IsSALU
-                                        ? ResultReg
-                                        : RS->scavengeRegisterBackwards(
-                                              AMDGPU::SReg_32_XM0RegClass, MI,
-                                              false, 0, /*AllowSpill=*/false);
-            Register ScaledReg =
-                TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-            Register TmpResultReg = ScaledReg;
-
-            if (!LiveSCC) {
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
-                  .addReg(FrameReg)
-                  .addImm(ST.getWavefrontSizeLog2());
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
-                  .addReg(TmpResultReg, RegState::Kill)
-                  .addImm(Offset);
+        }
+        if (!MIB || IsSALU) {
+          // We have to produce a carry out, and there isn't a free SGPR pair
+          // for it. We can keep the whole computation on the SALU to avoid
+          // clobbering an additional register at the cost of an extra mov.
+
+          // We may have 1 free scratch SGPR even though a carry out is
+          // unavailable. Only one additional mov is needed.
+          Register TmpScaledReg = IsCopy && IsSALU
+                                      ? ResultReg
+                                      : RS->scavengeRegisterBackwards(
+                                            AMDGPU::SReg_32_XM0RegClass, MI,
+                                            false, 0, /*AllowSpill=*/false);
+          Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+          Register TmpResultReg = ScaledReg;
+
+          if (!LiveSCC) {
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+                .addReg(FrameReg)
+                .addImm(ST.getWavefrontSizeLog2());
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+                .addReg(TmpResultReg, RegState::Kill)
+                .addImm(Offset);
+          } else {
+            TmpResultReg = RS->scavengeRegisterBackwards(
+                AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
+
+            MachineInstrBuilder Add;
+            if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
+              BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
+                  .addReg(FrameReg);
+              if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+                    .addImm(Offset);
+                Add.addReg(ResultReg, RegState::Kill)
+                    .addReg(TmpResultReg, RegState::Kill)
+                    .addImm(0);
+              } else
+                Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
             } else {
-              TmpResultReg = RS->scavengeRegisterBackwards(
-                  AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
-
-              MachineInstrBuilder Add;
-              if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
-                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+              assert(Offset > 0 && isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
+                     "offset is unsafe for v_mad_u32_u24");
+
+              // We start with a frame pointer with a wave space value, and
+              // an offset in lane-space. We are materializing a lane space
+              // value. We can either do a right shift of the frame pointer
+              // to get to lane space, or a left shift of the offset to get
+              // to wavespace. We can right shift after the computation to
+              // get back to the desired per-lane value. We are using the
+              // mad_u32_u24 primarily as an add with no carry out clobber.
+              bool IsInlinableLiteral =
+                  AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm());
+              if (!IsInlinableLiteral) {
+                BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
                         TmpResultReg)
-                    .addImm(ST.getWavefrontSizeLog2())
-                    .addReg(FrameReg);
-                if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
-                  BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32),
-                          ResultReg)
-                      .addImm(Offset);
-                  Add.addReg(ResultReg, RegState::Kill)
-                      .addReg(TmpResultReg, RegState::Kill)
-                      .addImm(0);
-                } else
-                  Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
-              } else {
-                assert(Offset > 0 &&
-                       isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
-                       "offset is unsafe for v_mad_u32_u24");
-
-                // We start with a frame pointer with a wave space value, and
-                // an offset in lane-space. We are materializing a lane space
-                // value. We can either do a right shift of the frame pointer
-                // to get to lane space, or a left shift of the offset to get
-                // to wavespace. We can right shift after the computation to
-                // get back to the desired per-lane value. We are using the
-                // mad_u32_u24 primarily as an add with no carry out clobber.
-                bool IsInlinableLiteral = AMDGPU::isInlinableLiteral32(
-                    Offset, ST.hasInv2PiInlineImm());
-                if (!IsInlinableLiteral) {
-                  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
-                          TmpResultReg)
-                      .addImm(Offset);
-                }
-
-                Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
-                              TmpResultReg);
-
-                if (!IsInlinableLiteral) {
-                  Add.addReg(TmpResultReg, RegState::Kill);
-                } else {
-                  // We fold the offset into mad itself if its inlinable.
-                  Add.addImm(Offset);
-                }
-                Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
-                BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
-                        TmpResultReg)
-                    .addImm(ST.getWavefrontSizeLog2())
-                    .addReg(TmpResultReg);
+                    .addImm(Offset);
               }
 
-              Register NewDest = IsCopy ? ResultReg
-                                        : RS->scavengeRegisterBackwards(
-                                              AMDGPU::SReg_32RegClass, *Add,
-                                              false, 0, /*AllowSpill=*/true);
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-                      NewDest)
+              Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
+                            TmpResultReg);
+
+              if (!IsInlinableLiteral) {
+                Add.addReg(TmpResultReg, RegState::Kill);
+              } else {
+                // We fold the offset into mad itself if its inlinable.
+                Add.addImm(Offset);
+              }
+              Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
                   .addReg(TmpResultReg);
-              ResultReg = NewDest;
             }
-            if (!IsSALU)
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
-                  .addReg(TmpResultReg, RegState::Kill);
-            else
-              ResultReg = TmpResultReg;
-            // If there were truly no free SGPRs, we need to undo everything.
-            if (!TmpScaledReg.isValid()) {
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
+
+            Register NewDest = IsCopy ? ResultReg
+                                      : RS->scavengeRegisterBackwards(
+                                            AMDGPU::SReg_32RegClass, *Add,
+                                            false, 0, /*AllowSpill=*/true);
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                    NewDest)
+                .addReg(TmpResultReg);
+            ResultReg = NewDest;
+          }
+          if (!IsSALU)
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+                .addReg(TmpResultReg, RegState::Kill);
+          else
+            ResultReg = TmpResultReg;
+          // If there were truly no free SGPRs, we need to undo everything.
+          if (!TmpScaledReg.isValid()) {
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
                 .addReg(ScaledReg, RegState::Kill)
                 .addImm(-Offset);
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
                 .addReg(FrameReg)
                 .addImm(ST.getWavefrontSizeLog2());
-            }
           }
         }
+      }
 
-        // Don't introduce an extra copy if we're just materializing in a mov.
-        if (IsCopy) {
-          MI->eraseFromParent();
-          return true;
-        }
-        FIOp->ChangeToRegister(ResultReg, false, false, true);
-        return false;
+      // Don't introduce an extra copy if we're just materializing in a mov.
+      if (IsCopy) {
+        MI->eraseFromParent();
+        return true;
       }
+      FIOp->ChangeToRegister(ResultReg, false, false, true);
+      return false;
+    }
 
-      if (IsMUBUF) {
-        // Disable offen so we don't need a 0 vgpr base.
-        assert(static_cast<int>(FIOperandNum) ==
-               AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                          AMDGPU::OpName::vaddr));
+    if (IsMUBUF) {
+      // Disable offen so we don't need a 0 vgpr base.
+      assert(
+          static_cast<int>(FIOperandNum) ==
+          AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));
 
-        auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
-        assert((SOffset.isImm() && SOffset.getImm() == 0));
+      auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
+      assert((SOffset.isImm() && SOffset.getImm() == 0));
 
-        if (FrameReg != AMDGPU::NoRegister)
-          SOffset.ChangeToRegister(FrameReg, false);
+      if (FrameReg != AMDGPU::NoRegister)
+        SOffset.ChangeToRegister(FrameReg, false);
 
-        int64_t Offset = FrameInfo.getObjectOffset(Index);
-        int64_t OldImm
-          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
-        int64_t NewOffset = OldImm + Offset;
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      int64_t OldImm =
+          TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
+      int64_t NewOffset = OldImm + Offset;
 
-        if (TII->isLegalMUBUFImmOffset(NewOffset) &&
-            buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
-          MI->eraseFromParent();
-          return true;
-        }
+      if (TII->isLegalMUBUFImmOffset(NewOffset) &&
+          buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
+        MI->eraseFromParent();
+        return true;
       }
+    }
 
-      // If the offset is simply too big, don't convert to a scratch wave offset
-      // relative index.
+    // If the offset is simply too big, don't convert to a scratch wave offset
+    // relative index.
 
-      FIOp->ChangeToImmediate(Offset);
-      if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
-        Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
-                                                        MI, false, 0);
-        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+    FIOp->ChangeToImmediate(Offset);
+    if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
+      Register TmpReg =
+          RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
           .addImm(Offset);
-        FIOp->ChangeToRegister(TmpReg, false, false, true);
-      }
+      FIOp->ChangeToRegister(TmpReg, false, false, true);
     }
-  }
+
   return false;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index fc6cd74bf052ca..831e246426ba70 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -980,8 +980,8 @@ body:             |
     ; GFX7-NEXT: {{  $}}
     ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX7-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -989,8 +989,8 @@ body:             |
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX8-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -998,8 +998,8 @@ body:             |
     ; GFX900-NEXT: {{  $}}
     ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX900-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1007,8 +1007,8 @@ body:             |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1020,8 +1020,8 @@ body:             |
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
-    ; GFX940: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX940: $sgpr4 = S_MOV_B32 72
+    ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
     ; GFX940-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -2036,3 +2036,521 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0,  renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    renamable $vgpr0,  renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0,  renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    renamable $vgpr0,  renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0, implicit $vcc
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 9c2fef05124d7f..de198941b565e6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1692,3 +1692,61 @@ body:             |
     SI_RETURN implicit $vgpr0
 
 ...
+
+---
+name:            v_add_u32_e64_imm_fi_vop3_literal_error
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; MUBUF: liveins: $sgpr4
+    ; MUBUF-NEXT: {{  $}}
+    ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; MUBUFW32: liveins: $sgpr4
+    ; MUBUFW32-NEXT: {{  $}}
+    ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc
+    ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
+    ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec
+    ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; MUBUFW32-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1048576, implicit-def dead $scc
+    ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; FLATSCRW64: liveins: $sgpr4
+    ; FLATSCRW64-NEXT: {{  $}}
+    ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index c11b7d67a8a214..b1ea275a97a394 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -327,4 +327,23 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}fi_vop3_literal_error:
+; CI: v_lshr_b32_e64 [[SCALED_FP:v[0-9]+]], s33, 6
+; CI: s_movk_i32 vcc_lo, 0x3000
+; CI-NEXT: v_add_i32_e32 [[SCALED_FP]], vcc, vcc_lo, [[SCALED_FP]]
+; CI-NEXT: v_add_i32_e32 v0, vcc, 64, [[SCALED_FP]]
+
+; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED_FP:v[0-9]+]], 6, s33
+; GFX9-MUBUF-NEXT: v_add_u32_e32 [[SCALED_FP]], 0x3000, [[SCALED_FP]]
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, [[SCALED_FP]]
+define void @fi_vop3_literal_error() {
+entry:
+  %pin.low = alloca i32, align 8192, addrspace(5)
+  %local.area = alloca [1060 x i64], align 4096, addrspace(5)
+  store i32 0, ptr addrspace(5) %pin.low, align 4
+  %gep.small.offset = getelementptr i8, ptr addrspace(5) %local.area, i64 64
+  %load1 = load volatile i64, ptr addrspace(5) %gep.small.offset, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }

diff  --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 12afc267354220..29fbb0bb1c6c97 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -20,8 +20,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; MUBUF-LABEL: local_stack_offset_uses_sp:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_add_u32 s0, s0, s17
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x3000
 ; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
-; MUBUF-NEXT:    v_mov_b32_e32 v0, 0x3040
+; MUBUF-NEXT:    v_add_u32_e32 v0, 64, v1
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 0
 ; MUBUF-NEXT:    v_mov_b32_e32 v2, 0x2000
 ; MUBUF-NEXT:    s_mov_b32 s4, 0
@@ -110,7 +111,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; MUBUF-NEXT:    s_add_i32 s33, s32, 0x7ffc0
 ; MUBUF-NEXT:    s_and_b32 s33, s33, 0xfff80000
 ; MUBUF-NEXT:    v_lshrrev_b32_e64 v3, 6, s33
-; MUBUF-NEXT:    v_add_u32_e32 v2, 0x3040, v3
+; MUBUF-NEXT:    v_add_u32_e32 v3, 0x3000, v3
+; MUBUF-NEXT:    v_add_u32_e32 v2, 64, v3
 ; MUBUF-NEXT:    v_mov_b32_e32 v3, 0
 ; MUBUF-NEXT:    v_mov_b32_e32 v4, 0x2000
 ; MUBUF-NEXT:    s_mov_b32 s4, 0


        


More information about the llvm-commits mailing list