[llvm] AMDGPU: Fix using illegal VOP3 literal in frame index elimination (PR #115747)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 11 09:50:35 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/115747

>From 206b2a3f3b66a8dfe0dffc7b4b51850b7b2123c4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 10 Nov 2024 09:41:13 -0800
Subject: [PATCH] AMDGPU: Fix using illegal VOP3 literal in frame index
 elimination

Most of the change is just moving the default case outside
of the switch so we can break to the default handling for arbitrary
operations.
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     | 715 +++++++++---------
 .../eliminate-frame-index-v-add-co-u32.mir    | 538 ++++++++++++-
 .../eliminate-frame-index-v-add-u32.mir       |  58 ++
 .../CodeGen/AMDGPU/frame-index-elimination.ll |  19 +
 .../local-stack-alloc-block-sp-reference.ll   |   6 +-
 5 files changed, 970 insertions(+), 366 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f76d1266f495cf..246ef7ad481ab7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  DebugLoc DL = MI->getDebugLoc();
+  const DebugLoc &DL = MI->getDebugLoc();
 
   assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
 
@@ -2496,6 +2496,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       Register MaterializedReg = FrameReg;
       Register ScavengedVGPR;
 
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      // For the non-immediate case, we could fall through to the default
+      // handling, but we do an in-place update of the result register here to
+      // avoid scavenging another register.
+      if (OtherOp->isImm()) {
+        int64_t TotalOffset = OtherOp->getImm() + Offset;
+
+        if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) &&
+            !AMDGPU::isInlinableIntLiteral(TotalOffset)) {
+          // If we can't support a VOP3 literal in the VALU instruction, we
+          // can't specially fold into the add.
+          // TODO: Handle VOP3->VOP2 shrink to support the fold.
+          break;
+        }
+
+        OtherOp->setImm(TotalOffset);
+        Offset = 0;
+      }
+
       if (FrameReg && !ST.enableFlatScratch()) {
         // We should just do an in-place update of the result register. However,
         // the value there may also be used by the add, in which case we need a
@@ -2516,15 +2535,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         MaterializedReg = ScavengedVGPR;
       }
 
-      int64_t Offset = FrameInfo.getObjectOffset(Index);
-      // For the non-immediate case, we could fall through to the default
-      // handling, but we do an in-place update of the result register here to
-      // avoid scavenging another register.
-      if (OtherOp->isImm()) {
-        OtherOp->setImm(OtherOp->getImm() + Offset);
-        Offset = 0;
-      }
-
       if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
         if (ST.enableFlatScratch() &&
             !TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
@@ -2761,411 +2771,408 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       return true;
     }
     default: {
-      // Other access to frame index
-      const DebugLoc &DL = MI->getDebugLoc();
+      break;
+    }
+    }
 
-      int64_t Offset = FrameInfo.getObjectOffset(Index);
-      if (ST.enableFlatScratch()) {
-        if (TII->isFLATScratch(*MI)) {
-          assert((int16_t)FIOperandNum ==
-                 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                            AMDGPU::OpName::saddr));
+    int64_t Offset = FrameInfo.getObjectOffset(Index);
+    if (ST.enableFlatScratch()) {
+      if (TII->isFLATScratch(*MI)) {
+        assert(
+            (int16_t)FIOperandNum ==
+            AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
 
-          // The offset is always swizzled, just replace it
-          if (FrameReg)
-            FIOp->ChangeToRegister(FrameReg, false);
+        // The offset is always swizzled, just replace it
+        if (FrameReg)
+          FIOp->ChangeToRegister(FrameReg, false);
 
-          MachineOperand *OffsetOp =
+        MachineOperand *OffsetOp =
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
-          int64_t NewOffset = Offset + OffsetOp->getImm();
-          if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
-                                     SIInstrFlags::FlatScratch)) {
-            OffsetOp->setImm(NewOffset);
-            if (FrameReg)
-              return false;
-            Offset = 0;
-          }
+        int64_t NewOffset = Offset + OffsetOp->getImm();
+        if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+                                   SIInstrFlags::FlatScratch)) {
+          OffsetOp->setImm(NewOffset);
+          if (FrameReg)
+            return false;
+          Offset = 0;
+        }
 
-          if (!Offset) {
-            unsigned Opc = MI->getOpcode();
-            int NewOpc = -1;
-            if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
-              NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
-            } else if (ST.hasFlatScratchSTMode()) {
-              // On GFX10 we have ST mode to use no registers for an address.
-              // Otherwise we need to materialize 0 into an SGPR.
-              NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
-            }
+        if (!Offset) {
+          unsigned Opc = MI->getOpcode();
+          int NewOpc = -1;
+          if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
+            NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+          } else if (ST.hasFlatScratchSTMode()) {
+            // On GFX10 we have ST mode to use no registers for an address.
+            // Otherwise we need to materialize 0 into an SGPR.
+            NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+          }
 
-            if (NewOpc != -1) {
-              // removeOperand doesn't fixup tied operand indexes as it goes, so
-              // it asserts. Untie vdst_in for now and retie them afterwards.
-              int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
-                                                     AMDGPU::OpName::vdst_in);
-              bool TiedVDst = VDstIn != -1 &&
-                              MI->getOperand(VDstIn).isReg() &&
-                              MI->getOperand(VDstIn).isTied();
-              if (TiedVDst)
-                MI->untieRegOperand(VDstIn);
-
-              MI->removeOperand(
-                  AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-
-              if (TiedVDst) {
-                int NewVDst =
-                    AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
-                int NewVDstIn =
-                    AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
-                assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
-                MI->tieOperands(NewVDst, NewVDstIn);
-              }
-              MI->setDesc(TII->get(NewOpc));
-              return false;
+          if (NewOpc != -1) {
+            // removeOperand doesn't fixup tied operand indexes as it goes, so
+            // it asserts. Untie vdst_in for now and retie them afterwards.
+            int VDstIn =
+                AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+            bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
+                            MI->getOperand(VDstIn).isTied();
+            if (TiedVDst)
+              MI->untieRegOperand(VDstIn);
+
+            MI->removeOperand(
+                AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+
+            if (TiedVDst) {
+              int NewVDst =
+                  AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+              int NewVDstIn =
+                  AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
+              assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
+              MI->tieOperands(NewVDst, NewVDstIn);
             }
+            MI->setDesc(TII->get(NewOpc));
+            return false;
           }
         }
+      }
 
-        if (!FrameReg) {
-          FIOp->ChangeToImmediate(Offset);
-          if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
-            return false;
-        }
+      if (!FrameReg) {
+        FIOp->ChangeToImmediate(Offset);
+        if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
+          return false;
+      }
 
-        // We need to use register here. Check if we can use an SGPR or need
-        // a VGPR.
-        FIOp->ChangeToRegister(AMDGPU::M0, false);
-        bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
+      // We need to use register here. Check if we can use an SGPR or need
+      // a VGPR.
+      FIOp->ChangeToRegister(AMDGPU::M0, false);
+      bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
 
-        if (!Offset && FrameReg && UseSGPR) {
-          FIOp->setReg(FrameReg);
-          return false;
-        }
+      if (!Offset && FrameReg && UseSGPR) {
+        FIOp->setReg(FrameReg);
+        return false;
+      }
 
-        const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
-                                                : &AMDGPU::VGPR_32RegClass;
+      const TargetRegisterClass *RC =
+          UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
 
-        Register TmpReg =
-            RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
-        FIOp->setReg(TmpReg);
-        FIOp->setIsKill();
+      Register TmpReg =
+          RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
+      FIOp->setReg(TmpReg);
+      FIOp->setIsKill();
 
-        if ((!FrameReg || !Offset) && TmpReg) {
-          unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
-          auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
-          if (FrameReg)
-            MIB.addReg(FrameReg);
-          else
-            MIB.addImm(Offset);
+      if ((!FrameReg || !Offset) && TmpReg) {
+        unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+        auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+        if (FrameReg)
+          MIB.addReg(FrameReg);
+        else
+          MIB.addImm(Offset);
 
-          return false;
-        }
+        return false;
+      }
 
-        bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
-                           !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+      bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+                         !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
 
-        Register TmpSReg =
-            UseSGPR ? TmpReg
-                    : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
-                                                    MI, false, 0, !UseSGPR);
+      Register TmpSReg =
+          UseSGPR ? TmpReg
+                  : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+                                                  MI, false, 0, !UseSGPR);
 
-        // TODO: for flat scratch another attempt can be made with a VGPR index
-        //       if no SGPRs can be scavenged.
-        if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
-          report_fatal_error("Cannot scavenge register in FI elimination!");
+      // TODO: for flat scratch another attempt can be made with a VGPR index
+      //       if no SGPRs can be scavenged.
+      if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+        report_fatal_error("Cannot scavenge register in FI elimination!");
 
-        if (!TmpSReg) {
-          // Use frame register and restore it after.
-          TmpSReg = FrameReg;
-          FIOp->setReg(FrameReg);
-          FIOp->setIsKill(false);
-        }
+      if (!TmpSReg) {
+        // Use frame register and restore it after.
+        TmpSReg = FrameReg;
+        FIOp->setReg(FrameReg);
+        FIOp->setIsKill(false);
+      }
 
-        if (NeedSaveSCC) {
-          assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
-              .addReg(FrameReg)
-              .addImm(Offset);
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
-              .addReg(TmpSReg)
-              .addImm(0);
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+      if (NeedSaveSCC) {
+        assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+            .addReg(FrameReg)
+            .addImm(Offset);
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+            .addReg(TmpSReg)
+            .addImm(0);
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+            .addImm(0)
+            .addReg(TmpSReg);
+      } else {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+            .addReg(FrameReg)
+            .addImm(Offset);
+      }
+
+      if (!UseSGPR)
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+            .addReg(TmpSReg, RegState::Kill);
+
+      if (TmpSReg == FrameReg) {
+        // Undo frame register modification.
+        if (NeedSaveSCC &&
+            !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
+          MachineBasicBlock::iterator I =
+              BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+                      TmpSReg)
+                  .addReg(FrameReg)
+                  .addImm(-Offset);
+          I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+                  .addReg(TmpSReg)
+                  .addImm(0);
+          BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+                  TmpSReg)
               .addImm(0)
               .addReg(TmpSReg);
         } else {
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+          BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+                  FrameReg)
               .addReg(FrameReg)
-              .addImm(Offset);
+              .addImm(-Offset);
         }
+      }
 
-        if (!UseSGPR)
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
-            .addReg(TmpSReg, RegState::Kill);
-
-        if (TmpSReg == FrameReg) {
-          // Undo frame register modification.
-          if (NeedSaveSCC &&
-              !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
-            MachineBasicBlock::iterator I =
-                BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
-                        TmpSReg)
-                    .addReg(FrameReg)
-                    .addImm(-Offset);
-            I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
-                    .addReg(TmpSReg)
-                    .addImm(0);
-            BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
-                    TmpSReg)
-                .addImm(0)
-                .addReg(TmpSReg);
-          } else {
-            BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
-                    FrameReg)
-                .addReg(FrameReg)
-                .addImm(-Offset);
-          }
-        }
+      return false;
+    }
 
-        return false;
-      }
+    bool IsMUBUF = TII->isMUBUF(*MI);
+
+    if (!IsMUBUF && !MFI->isBottomOfStack()) {
+      // Convert to a swizzled stack address by scaling by the wave size.
+      // In an entry function/kernel the offset is already swizzled.
+      bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+      bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+                     !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+      const TargetRegisterClass *RC = IsSALU && !LiveSCC
+                                          ? &AMDGPU::SReg_32RegClass
+                                          : &AMDGPU::VGPR_32RegClass;
+      bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+                    MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+                    MI->getOpcode() == AMDGPU::S_MOV_B32;
+      Register ResultReg =
+          IsCopy ? MI->getOperand(0).getReg()
+                 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
 
-      bool IsMUBUF = TII->isMUBUF(*MI);
-
-      if (!IsMUBUF && !MFI->isBottomOfStack()) {
-        // Convert to a swizzled stack address by scaling by the wave size.
-        // In an entry function/kernel the offset is already swizzled.
-        bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
-        bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
-                       !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
-        const TargetRegisterClass *RC = IsSALU && !LiveSCC
-                                            ? &AMDGPU::SReg_32RegClass
-                                            : &AMDGPU::VGPR_32RegClass;
-        bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
-                      MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
-                      MI->getOpcode() == AMDGPU::S_MOV_B32;
-        Register ResultReg =
-            IsCopy ? MI->getOperand(0).getReg()
-                   : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
-
-        int64_t Offset = FrameInfo.getObjectOffset(Index);
-        if (Offset == 0) {
-          unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
-                                               : AMDGPU::V_LSHRREV_B32_e64;
-          Register TmpResultReg = ResultReg;
-          if (IsSALU && LiveSCC) {
-            TmpResultReg = RS->scavengeRegisterBackwards(
-                AMDGPU::VGPR_32RegClass, MI, false, 0);
-          }
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      if (Offset == 0) {
+        unsigned OpCode =
+            IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
+        Register TmpResultReg = ResultReg;
+        if (IsSALU && LiveSCC) {
+          TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+                                                       MI, false, 0);
+        }
 
-          auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
-          if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
-            // For V_LSHRREV, the operands are reversed (the shift count goes
-            // first).
-            Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
-          else
-            Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
-          if (IsSALU && !LiveSCC)
-            Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
-          if (IsSALU && LiveSCC) {
-            Register NewDest =
-                IsCopy ? ResultReg
-                       : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
-                                                       Shift, false, 0);
-            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-                    NewDest)
-                .addReg(TmpResultReg);
-            ResultReg = NewDest;
-          }
-        } else {
-          MachineInstrBuilder MIB;
-          if (!IsSALU) {
-            if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
-                nullptr) {
-              // Reuse ResultReg in intermediate step.
-              Register ScaledReg = ResultReg;
-
-              BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
-                      ScaledReg)
+        auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
+        if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
+          // For V_LSHRREV, the operands are reversed (the shift count goes
+          // first).
+          Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
+        else
+          Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
+        if (IsSALU && !LiveSCC)
+          Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
+        if (IsSALU && LiveSCC) {
+          Register NewDest =
+              IsCopy ? ResultReg
+                     : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+                                                     Shift, false, 0);
+          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
+              .addReg(TmpResultReg);
+          ResultReg = NewDest;
+        }
+      } else {
+        MachineInstrBuilder MIB;
+        if (!IsSALU) {
+          if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+              nullptr) {
+            // Reuse ResultReg in intermediate step.
+            Register ScaledReg = ResultReg;
+
+            BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                    ScaledReg)
                 .addImm(ST.getWavefrontSizeLog2())
                 .addReg(FrameReg);
 
-              const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+            const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
 
-              // TODO: Fold if use instruction is another add of a constant.
-              if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
-                // FIXME: This can fail
-                MIB.addImm(Offset);
-                MIB.addReg(ScaledReg, RegState::Kill);
-                if (!IsVOP2)
-                  MIB.addImm(0); // clamp bit
-              } else {
-                assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
-                       "Need to reuse carry out register");
-
-                // Use scavenged unused carry out as offset register.
-                Register ConstOffsetReg;
-                if (!isWave32)
-                  ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
-                else
-                  ConstOffsetReg = MIB.getReg(1);
-
-                BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
-                    .addImm(Offset);
-                MIB.addReg(ConstOffsetReg, RegState::Kill);
-                MIB.addReg(ScaledReg, RegState::Kill);
+            // TODO: Fold if use instruction is another add of a constant.
+            if (IsVOP2 ||
+                AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+              // FIXME: This can fail
+              MIB.addImm(Offset);
+              MIB.addReg(ScaledReg, RegState::Kill);
+              if (!IsVOP2)
                 MIB.addImm(0); // clamp bit
-              }
+            } else {
+              assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
+                     "Need to reuse carry out register");
+
+              // Use scavenged unused carry out as offset register.
+              Register ConstOffsetReg;
+              if (!isWave32)
+                ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+              else
+                ConstOffsetReg = MIB.getReg(1);
+
+              BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32),
+                      ConstOffsetReg)
+                  .addImm(Offset);
+              MIB.addReg(ConstOffsetReg, RegState::Kill);
+              MIB.addReg(ScaledReg, RegState::Kill);
+              MIB.addImm(0); // clamp bit
             }
           }
-          if (!MIB || IsSALU) {
-            // We have to produce a carry out, and there isn't a free SGPR pair
-            // for it. We can keep the whole computation on the SALU to avoid
-            // clobbering an additional register at the cost of an extra mov.
-
-            // We may have 1 free scratch SGPR even though a carry out is
-            // unavailable. Only one additional mov is needed.
-            Register TmpScaledReg = IsCopy && IsSALU
-                                        ? ResultReg
-                                        : RS->scavengeRegisterBackwards(
-                                              AMDGPU::SReg_32_XM0RegClass, MI,
-                                              false, 0, /*AllowSpill=*/false);
-            Register ScaledReg =
-                TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-            Register TmpResultReg = ScaledReg;
-
-            if (!LiveSCC) {
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
-                  .addReg(FrameReg)
-                  .addImm(ST.getWavefrontSizeLog2());
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
-                  .addReg(TmpResultReg, RegState::Kill)
-                  .addImm(Offset);
+        }
+        if (!MIB || IsSALU) {
+          // We have to produce a carry out, and there isn't a free SGPR pair
+          // for it. We can keep the whole computation on the SALU to avoid
+          // clobbering an additional register at the cost of an extra mov.
+
+          // We may have 1 free scratch SGPR even though a carry out is
+          // unavailable. Only one additional mov is needed.
+          Register TmpScaledReg = IsCopy && IsSALU
+                                      ? ResultReg
+                                      : RS->scavengeRegisterBackwards(
+                                            AMDGPU::SReg_32_XM0RegClass, MI,
+                                            false, 0, /*AllowSpill=*/false);
+          Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+          Register TmpResultReg = ScaledReg;
+
+          if (!LiveSCC) {
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+                .addReg(FrameReg)
+                .addImm(ST.getWavefrontSizeLog2());
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+                .addReg(TmpResultReg, RegState::Kill)
+                .addImm(Offset);
+          } else {
+            TmpResultReg = RS->scavengeRegisterBackwards(
+                AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
+
+            MachineInstrBuilder Add;
+            if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
+              BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
+                  .addReg(FrameReg);
+              if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+                    .addImm(Offset);
+                Add.addReg(ResultReg, RegState::Kill)
+                    .addReg(TmpResultReg, RegState::Kill)
+                    .addImm(0);
+              } else
+                Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
             } else {
-              TmpResultReg = RS->scavengeRegisterBackwards(
-                  AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
-
-              MachineInstrBuilder Add;
-              if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) {
-                BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+              assert(Offset > 0 && isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
+                     "offset is unsafe for v_mad_u32_u24");
+
+              // We start with a frame pointer with a wave space value, and
+              // an offset in lane-space. We are materializing a lane space
+              // value. We can either do a right shift of the frame pointer
+              // to get to lane space, or a left shift of the offset to get
+              // to wavespace. We can right shift after the computation to
+              // get back to the desired per-lane value. We are using the
+              // mad_u32_u24 primarily as an add with no carry out clobber.
+              bool IsInlinableLiteral =
+                  AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm());
+              if (!IsInlinableLiteral) {
+                BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
                         TmpResultReg)
-                    .addImm(ST.getWavefrontSizeLog2())
-                    .addReg(FrameReg);
-                if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
-                  BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32),
-                          ResultReg)
-                      .addImm(Offset);
-                  Add.addReg(ResultReg, RegState::Kill)
-                      .addReg(TmpResultReg, RegState::Kill)
-                      .addImm(0);
-                } else
-                  Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
-              } else {
-                assert(Offset > 0 &&
-                       isUInt<24>(2 * ST.getMaxWaveScratchSize()) &&
-                       "offset is unsafe for v_mad_u32_u24");
-
-                // We start with a frame pointer with a wave space value, and
-                // an offset in lane-space. We are materializing a lane space
-                // value. We can either do a right shift of the frame pointer
-                // to get to lane space, or a left shift of the offset to get
-                // to wavespace. We can right shift after the computation to
-                // get back to the desired per-lane value. We are using the
-                // mad_u32_u24 primarily as an add with no carry out clobber.
-                bool IsInlinableLiteral = AMDGPU::isInlinableLiteral32(
-                    Offset, ST.hasInv2PiInlineImm());
-                if (!IsInlinableLiteral) {
-                  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32),
-                          TmpResultReg)
-                      .addImm(Offset);
-                }
-
-                Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
-                              TmpResultReg);
-
-                if (!IsInlinableLiteral) {
-                  Add.addReg(TmpResultReg, RegState::Kill);
-                } else {
-                  // We fold the offset into mad itself if its inlinable.
-                  Add.addImm(Offset);
-                }
-                Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
-                BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
-                        TmpResultReg)
-                    .addImm(ST.getWavefrontSizeLog2())
-                    .addReg(TmpResultReg);
+                    .addImm(Offset);
               }
 
-              Register NewDest = IsCopy ? ResultReg
-                                        : RS->scavengeRegisterBackwards(
-                                              AMDGPU::SReg_32RegClass, *Add,
-                                              false, 0, /*AllowSpill=*/true);
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
-                      NewDest)
+              Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
+                            TmpResultReg);
+
+              if (!IsInlinableLiteral) {
+                Add.addReg(TmpResultReg, RegState::Kill);
+              } else {
+                // We fold the offset into mad itself if its inlinable.
+                Add.addImm(Offset);
+              }
+              Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
+              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+                      TmpResultReg)
+                  .addImm(ST.getWavefrontSizeLog2())
                   .addReg(TmpResultReg);
-              ResultReg = NewDest;
             }
-            if (!IsSALU)
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
-                  .addReg(TmpResultReg, RegState::Kill);
-            else
-              ResultReg = TmpResultReg;
-            // If there were truly no free SGPRs, we need to undo everything.
-            if (!TmpScaledReg.isValid()) {
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
+
+            Register NewDest = IsCopy ? ResultReg
+                                      : RS->scavengeRegisterBackwards(
+                                            AMDGPU::SReg_32RegClass, *Add,
+                                            false, 0, /*AllowSpill=*/true);
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                    NewDest)
+                .addReg(TmpResultReg);
+            ResultReg = NewDest;
+          }
+          if (!IsSALU)
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+                .addReg(TmpResultReg, RegState::Kill);
+          else
+            ResultReg = TmpResultReg;
+          // If there were truly no free SGPRs, we need to undo everything.
+          if (!TmpScaledReg.isValid()) {
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
                 .addReg(ScaledReg, RegState::Kill)
                 .addImm(-Offset);
-              BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
+            BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
                 .addReg(FrameReg)
                 .addImm(ST.getWavefrontSizeLog2());
-            }
           }
         }
+      }
 
-        // Don't introduce an extra copy if we're just materializing in a mov.
-        if (IsCopy) {
-          MI->eraseFromParent();
-          return true;
-        }
-        FIOp->ChangeToRegister(ResultReg, false, false, true);
-        return false;
+      // Don't introduce an extra copy if we're just materializing in a mov.
+      if (IsCopy) {
+        MI->eraseFromParent();
+        return true;
       }
+      FIOp->ChangeToRegister(ResultReg, false, false, true);
+      return false;
+    }
 
-      if (IsMUBUF) {
-        // Disable offen so we don't need a 0 vgpr base.
-        assert(static_cast<int>(FIOperandNum) ==
-               AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                          AMDGPU::OpName::vaddr));
+    if (IsMUBUF) {
+      // Disable offen so we don't need a 0 vgpr base.
+      assert(
+          static_cast<int>(FIOperandNum) ==
+          AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr));
 
-        auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
-        assert((SOffset.isImm() && SOffset.getImm() == 0));
+      auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
+      assert((SOffset.isImm() && SOffset.getImm() == 0));
 
-        if (FrameReg != AMDGPU::NoRegister)
-          SOffset.ChangeToRegister(FrameReg, false);
+      if (FrameReg != AMDGPU::NoRegister)
+        SOffset.ChangeToRegister(FrameReg, false);
 
-        int64_t Offset = FrameInfo.getObjectOffset(Index);
-        int64_t OldImm
-          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
-        int64_t NewOffset = OldImm + Offset;
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
+      int64_t OldImm =
+          TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
+      int64_t NewOffset = OldImm + Offset;
 
-        if (TII->isLegalMUBUFImmOffset(NewOffset) &&
-            buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
-          MI->eraseFromParent();
-          return true;
-        }
+      if (TII->isLegalMUBUFImmOffset(NewOffset) &&
+          buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
+        MI->eraseFromParent();
+        return true;
       }
+    }
 
-      // If the offset is simply too big, don't convert to a scratch wave offset
-      // relative index.
+    // If the offset is simply too big, don't convert to a scratch wave offset
+    // relative index.
 
-      FIOp->ChangeToImmediate(Offset);
-      if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
-        Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
-                                                        MI, false, 0);
-        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+    FIOp->ChangeToImmediate(Offset);
+    if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) {
+      Register TmpReg =
+          RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
           .addImm(Offset);
-        FIOp->ChangeToRegister(TmpReg, false, false, true);
-      }
+      FIOp->ChangeToRegister(TmpReg, false, false, true);
     }
-  }
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index fc6cd74bf052ca..831e246426ba70 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -980,8 +980,8 @@ body:             |
     ; GFX7-NEXT: {{  $}}
     ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX7-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -989,8 +989,8 @@ body:             |
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX8-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -998,8 +998,8 @@ body:             |
     ; GFX900-NEXT: {{  $}}
     ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX900-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1007,8 +1007,8 @@ body:             |
     ; GFX90A-NEXT: {{  $}}
     ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
     ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -1020,8 +1020,8 @@ body:             |
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
-    ; GFX940: $vgpr1 = V_MOV_B32_e32 84, implicit $exec
-    ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec
+    ; GFX940: $sgpr4 = S_MOV_B32 72
+    ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
     ; GFX940-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
@@ -2036,3 +2036,521 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0,  renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+    renamable $vgpr0,  renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0,  renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...
+
+---
+name:            v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX7: liveins: $sgpr4
+    ; GFX7-NEXT: {{  $}}
+    ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288
+    ; GFX7-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX8: liveins: $sgpr4
+    ; GFX8-NEXT: {{  $}}
+    ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288
+    ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX900: liveins: $sgpr4
+    ; GFX900-NEXT: {{  $}}
+    ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX90A: liveins: $sgpr4
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX10: liveins: $sgpr4
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX940: liveins: $sgpr4
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX11: liveins: $sgpr4
+    ; GFX11-NEXT: {{  $}}
+    ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    ;
+    ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live
+    ; GFX12: liveins: $sgpr4
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
+    ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+    ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+    renamable $vgpr0,  renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 9c2fef05124d7f..de198941b565e6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1692,3 +1692,61 @@ body:             |
     SI_RETURN implicit $vgpr0
 
 ...
+
+---
+name:            v_add_u32_e64_imm_fi_vop3_literal_error
+tracksRegLiveness: true
+frameInfo:
+  localFrameSize:  12576
+stack:
+  - { id: 0, size: 4, alignment: 8192, local-offset: 0 }
+  - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; MUBUF: liveins: $sgpr4
+    ; MUBUF-NEXT: {{  $}}
+    ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
+    ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
+    ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
+    ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec
+    ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec
+    ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
+    ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; MUBUFW32: liveins: $sgpr4
+    ; MUBUFW32-NEXT: {{  $}}
+    ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc
+    ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
+    ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc
+    ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec
+    ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; MUBUFW32-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1048576, implicit-def dead $scc
+    ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+    ;
+    ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error
+    ; FLATSCRW64: liveins: $sgpr4
+    ; FLATSCRW64-NEXT: {{  $}}
+    ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc
+    ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr5, 0, implicit $exec
+    ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
+    ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
+    ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+    renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index c11b7d67a8a214..b1ea275a97a394 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -327,4 +327,23 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}fi_vop3_literal_error:
+; CI: v_lshr_b32_e64 [[SCALED_FP:v[0-9]+]], s33, 6
+; CI: s_movk_i32 vcc_lo, 0x3000
+; CI-NEXT: v_add_i32_e32 [[SCALED_FP]], vcc, vcc_lo, [[SCALED_FP]]
+; CI-NEXT: v_add_i32_e32 v0, vcc, 64, [[SCALED_FP]]
+
+; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED_FP:v[0-9]+]], 6, s33
+; GFX9-MUBUF-NEXT: v_add_u32_e32 [[SCALED_FP]], 0x3000, [[SCALED_FP]]
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, [[SCALED_FP]]
+define void @fi_vop3_literal_error() {
+entry:
+  %pin.low = alloca i32, align 8192, addrspace(5)
+  %local.area = alloca [1060 x i64], align 4096, addrspace(5)
+  store i32 0, ptr addrspace(5) %pin.low, align 4
+  %gep.small.offset = getelementptr i8, ptr addrspace(5) %local.area, i64 64
+  %load1 = load volatile i64, ptr addrspace(5) %gep.small.offset, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 12afc267354220..29fbb0bb1c6c97 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -20,8 +20,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; MUBUF-LABEL: local_stack_offset_uses_sp:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_add_u32 s0, s0, s17
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x3000
 ; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
-; MUBUF-NEXT:    v_mov_b32_e32 v0, 0x3040
+; MUBUF-NEXT:    v_add_u32_e32 v0, 64, v1
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 0
 ; MUBUF-NEXT:    v_mov_b32_e32 v2, 0x2000
 ; MUBUF-NEXT:    s_mov_b32 s4, 0
@@ -110,7 +111,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) {
 ; MUBUF-NEXT:    s_add_i32 s33, s32, 0x7ffc0
 ; MUBUF-NEXT:    s_and_b32 s33, s33, 0xfff80000
 ; MUBUF-NEXT:    v_lshrrev_b32_e64 v3, 6, s33
-; MUBUF-NEXT:    v_add_u32_e32 v2, 0x3040, v3
+; MUBUF-NEXT:    v_add_u32_e32 v3, 0x3000, v3
+; MUBUF-NEXT:    v_add_u32_e32 v2, 64, v3
 ; MUBUF-NEXT:    v_mov_b32_e32 v3, 0
 ; MUBUF-NEXT:    v_mov_b32_e32 v4, 0x2000
 ; MUBUF-NEXT:    s_mov_b32 s4, 0



More information about the llvm-commits mailing list