[llvm] [AMDGPU] VOPD/VOPD3 changes for gfx1250 (PR #147602)

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 13:49:40 PDT 2025


https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/147602

>From 6320838450943d49e13096809a0bc9de1cf1d083 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Tue, 8 Jul 2025 12:56:42 -0700
Subject: [PATCH] [AMDGPU] VOPD/VOPD3 changes for gfx1250

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  207 +-
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp      |   70 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |    2 +
 llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp       |  106 +-
 llvm/lib/Target/AMDGPU/GCNVOPDUtils.h         |    2 +-
 llvm/lib/Target/AMDGPU/SIDefines.h            |    2 +
 llvm/lib/Target/AMDGPU/SIInstrFormats.td      |    3 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  132 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |    2 +
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  153 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  106 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |   84 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |   13 +-
 llvm/lib/Target/AMDGPU/VOPCInstructions.td    |   32 +-
 llvm/lib/Target/AMDGPU/VOPDInstructions.td    |  129 +-
 llvm/lib/Target/AMDGPU/VOPInstructions.td     |    9 +-
 llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll  |   75 +-
 .../CodeGen/AMDGPU/vopd-combine-gfx1250.mir   | 3243 +++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s   |  326 ++
 .../MC/AMDGPU/gfx1250_asm_vopd_features.s     |  109 +
 20 files changed, 4574 insertions(+), 231 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
 create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s
 create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3af140461afdb..46a8247757409 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -335,6 +335,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
   }
 
+  bool isRegOrInlineImmWithFP64InputMods() const {
+    return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
+  }
+
+  bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
+
+  bool isVRegWithFP32InputMods() const {
+    return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
+  }
+
+  bool isVRegWithFP64InputMods() const {
+    return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
+  }
+
   bool isPackedFP16InputMods() const {
     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
   }
@@ -527,7 +541,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
   }
 
-  bool isVCSrcB64() const {
+  bool isVCSrc_b64() const {
     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
   }
 
@@ -553,7 +567,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
   }
 
-  bool isVCSrcF64() const {
+  bool isVCSrc_f64() const {
     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
   }
 
@@ -601,7 +615,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
   }
 
-  bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
+  bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
 
   bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
 
@@ -617,15 +631,11 @@ class AMDGPUOperand : public MCParsedAsmOperand {
 
   bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
 
-  bool isVCSrcV2FP32() const {
-    return isVCSrcF64();
-  }
+  bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
 
   bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
 
-  bool isVCSrcV2INT32() const {
-    return isVCSrcB64();
-  }
+  bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
 
   bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
 
@@ -633,7 +643,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
   }
 
-  bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
+  bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
 
   bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
 
@@ -1527,6 +1537,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
 
+  bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
+
   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
 
   bool isGFX10_BEncoding() const {
@@ -1774,8 +1786,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
   bool validateSOPLiteral(const MCInst &Inst) const;
   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
-  bool validateVOPDRegBankConstraints(const MCInst &Inst,
-                                      const OperandVector &Operands);
+  std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
+                                                      bool AsVOPD3);
+  bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
+  bool tryVOPD(const MCInst &Inst);
+  bool tryVOPD3(const MCInst &Inst);
+  bool tryAnotherVOPDEncoding(const MCInst &Inst);
+
   bool validateIntClampSupported(const MCInst &Inst);
   bool validateMIMGAtomicDMask(const MCInst &Inst);
   bool validateMIMGGatherDMask(const MCInst &Inst);
@@ -3505,6 +3522,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     }
   }
 
+  // Asm can first try to match VOPD or VOPD3. By failing early here with
+  // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
+  // Checking later during validateInstruction does not give a chance to retry
+  // parsing as a different encoding.
+  if (tryAnotherVOPDEncoding(Inst))
+    return Match_InvalidOperand;
+
   return Match_Success;
 }
 
@@ -3685,8 +3709,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
 
     return {getNamedOperandIdx(Opcode, OpName::src0X),
             getNamedOperandIdx(Opcode, OpName::vsrc1X),
+            getNamedOperandIdx(Opcode, OpName::vsrc2X),
             getNamedOperandIdx(Opcode, OpName::src0Y),
             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
+            getNamedOperandIdx(Opcode, OpName::vsrc2Y),
             ImmXIdx,
             ImmIdx};
   }
@@ -3816,12 +3842,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
   return false;
 }
 
-bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
-    const MCInst &Inst, const OperandVector &Operands) {
+std::optional<unsigned>
+AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
 
   const unsigned Opcode = Inst.getOpcode();
   if (!isVOPD(Opcode))
-    return true;
+    return {};
 
   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
 
@@ -3832,16 +3858,64 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
                : MCRegister();
   };
 
-  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
-  bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
+  // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
+  // source-cache.
+  bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
+                 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
+                 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
+  bool AllowSameVGPR = isGFX1250();
+
+  if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
+    for (auto OpName : {OpName::src0X, OpName::src0Y}) {
+      int I = getNamedOperandIdx(Opcode, OpName);
+      const MCOperand &Op = Inst.getOperand(I);
+      if (!Op.isImm())
+        continue;
+      int64_t Imm = Op.getImm();
+      if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
+          !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
+        return (unsigned)I;
+    }
+
+    for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
+                        OpName::vsrc2Y, OpName::imm}) {
+      int I = getNamedOperandIdx(Opcode, OpName);
+      if (I == -1)
+        continue;
+      const MCOperand &Op = Inst.getOperand(I);
+      if (Op.isImm())
+        return (unsigned)I;
+    }
+  }
 
   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
-  auto InvalidCompOprIdx =
-      InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
-  if (!InvalidCompOprIdx)
+  auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
+      getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
+
+  return InvalidCompOprIdx;
+}
+
+bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
+                                   const OperandVector &Operands) {
+
+  unsigned Opcode = Inst.getOpcode();
+  bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
+
+  if (AsVOPD3) {
+    for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+      if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
+          (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
+        Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
+    }
+  }
+
+  auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
+  if (!InvalidCompOprIdx.has_value())
     return true;
 
   auto CompOprIdx = *InvalidCompOprIdx;
+  const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
   auto ParsedIdx =
       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
@@ -3849,7 +3923,10 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
 
   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
   if (CompOprIdx == VOPD::Component::DST) {
-    Error(Loc, "one dst register must be even and the other odd");
+    if (AsVOPD3)
+      Error(Loc, "dst registers must be distinct");
+    else
+      Error(Loc, "one dst register must be even and the other odd");
   } else {
     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
@@ -3859,6 +3936,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
   return false;
 }
 
+// \returns true if \p Inst does not satisfy VOPD constraints, but can be
+// potentially used as VOPD3 with the same operands.
+bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
+  // First check if it fits VOPD
+  auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
+  if (!InvalidCompOprIdx.has_value())
+    return false;
+
+  // Then if it fits VOPD3
+  InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
+  if (InvalidCompOprIdx.has_value()) {
+    // If failed operand is dst it is better to show error about VOPD3
+    // instruction as it has more capabilities and error message will be
+    // more informative. If the dst is not legal for VOPD3, then it is not
+    // legal for VOPD either.
+    if (*InvalidCompOprIdx == VOPD::Component::DST)
+      return true;
+
+    // Otherwise prefer VOPD as we may find ourselves in an awkward situation
+    // with a conflict in tied implicit src2 of fmac and no asm operand to
+    // to point to.
+    return false;
+  }
+  return true;
+}
+
+// \returns true is a VOPD3 instruction can be also represented as a shorter
+// VOPD encoding.
+bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
+  const unsigned Opcode = Inst.getOpcode();
+  const auto &II = getVOPDInstInfo(Opcode, &MII);
+  unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
+  if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
+      !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
+    return false;
+
+  // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
+  // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
+  // be parsed as VOPD which does not accept src2.
+  if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
+      II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
+    return false;
+
+  // If any modifiers are set this cannot be VOPD.
+  for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
+                      OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
+                      OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
+    int I = getNamedOperandIdx(Opcode, OpName);
+    if (I == -1)
+      continue;
+    if (Inst.getOperand(I).getImm())
+      return false;
+  }
+
+  return !tryVOPD3(Inst);
+}
+
+// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
+// form but switch to VOPD3 otherwise.
+bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
+  const unsigned Opcode = Inst.getOpcode();
+  if (!isGFX1250() || !isVOPD(Opcode))
+    return false;
+
+  if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
+    return tryVOPD(Inst);
+  return tryVOPD3(Inst);
+}
+
 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
 
   const unsigned Opc = Inst.getOpcode();
@@ -5179,7 +5325,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
   if (!validateConstantBusLimitations(Inst, Operands)) {
     return false;
   }
-  if (!validateVOPDRegBankConstraints(Inst, Operands)) {
+  if (!validateVOPD(Inst, Operands)) {
     return false;
   }
   if (!validateIntClampSupported(Inst)) {
@@ -9180,8 +9326,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
 
 // Create VOPD MCInst operands using parsed assembler operands.
 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+
   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+      return;
+    }
     if (Op.isReg()) {
       Op.addRegOperands(Inst, 1);
       return;
@@ -9210,6 +9362,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
     if (CInfo.hasSrc2Acc())
       addOp(CInfo.getIndexOfDstInParsedOperands());
   }
+
+  int BitOp3Idx =
+      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
+  if (BitOp3Idx != -1) {
+    OptionalImmIndexMap OptIdx;
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
+    if (Op.isImm())
+      OptIdx[Op.getImmTy()] = Operands.size() - 1;
+
+    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
+  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index ccc711a0bcc4e..27f40f1705bb4 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -42,11 +42,13 @@ class GCNCreateVOPD {
   class VOPDCombineInfo {
   public:
     VOPDCombineInfo() = default;
-    VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
-        : FirstMI(First), SecondMI(Second) {}
+    VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
+                    bool VOPD3 = false)
+        : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
 
     MachineInstr *FirstMI;
     MachineInstr *SecondMI;
+    bool IsVOPD3;
   };
 
 public:
@@ -59,9 +61,9 @@ class GCNCreateVOPD {
     unsigned Opc2 = SecondMI->getOpcode();
     unsigned EncodingFamily =
         AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
-    int NewOpcode =
-        AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
-                            AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+    int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1, CI.IsVOPD3),
+                                        AMDGPU::getVOPDOpcode(Opc2, CI.IsVOPD3),
+                                        EncodingFamily, CI.IsVOPD3);
     assert(NewOpcode != -1 &&
            "Should have previously determined this as a possible VOPD\n");
 
@@ -79,12 +81,36 @@ class GCNCreateVOPD {
       VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
     }
 
+    const AMDGPU::OpName Mods[2][3] = {
+        {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
+         AMDGPU::OpName::vsrc2X_modifiers},
+        {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
+         AMDGPU::OpName::vsrc2Y_modifiers}};
+    const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
+                                       AMDGPU::OpName::src1_modifiers,
+                                       AMDGPU::OpName::src2_modifiers};
+    const unsigned VOPDOpc = VOPDInst->getOpcode();
+
     for (auto CompIdx : VOPD::COMPONENTS) {
       auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+      bool IsVOP3 = SII->isVOP3(*MI[CompIdx]);
       for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
-        auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+        if (AMDGPU::hasNamedOperand(VOPDOpc, Mods[CompIdx][CompSrcIdx])) {
+          const MachineOperand *Mod =
+              SII->getNamedOperand(*MI[CompIdx], SrcMods[CompSrcIdx]);
+          VOPDInst.addImm(Mod ? Mod->getImm() : 0);
+        }
+        auto MCOprIdx =
+            InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, IsVOP3);
         VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
       }
+      if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
+        VOPDInst.addReg(AMDGPU::VCC_LO);
+    }
+
+    if (CI.IsVOPD3) {
+      if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc2))
+        VOPDInst.addImm(BitOp2);
     }
 
     SII->fixImplicitOperands(*VOPDInst);
@@ -109,6 +135,8 @@ class GCNCreateVOPD {
 
     const SIInstrInfo *SII = ST->getInstrInfo();
     bool Changed = false;
+    unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(*ST);
+    bool HasVOPD3 = ST->hasVOPD3();
 
     SmallVector<VOPDCombineInfo> ReplaceCandidates;
 
@@ -124,19 +152,27 @@ class GCNCreateVOPD {
         auto *SecondMI = &*MII;
         unsigned Opc = FirstMI->getOpcode();
         unsigned Opc2 = SecondMI->getOpcode();
-        llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
-        llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
         VOPDCombineInfo CI;
 
-        if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
-          CI = VOPDCombineInfo(FirstMI, SecondMI);
-        else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
-          CI = VOPDCombineInfo(SecondMI, FirstMI);
-        else
-          continue;
-        // checkVOPDRegConstraints cares about program order, but doReplace
-        // cares about X-Y order in the constituted VOPD
-        if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+        const auto checkVOPD = [&](bool VOPD3) -> bool {
+          llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
+              AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
+          llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
+              AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
+
+          if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+            CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
+          else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+            CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
+          else
+            return false;
+          // checkVOPDRegConstraints cares about program order, but doReplace
+          // cares about X-Y order in the constituted VOPD
+          return llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI,
+                                               VOPD3);
+        };
+
+        if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
           ReplaceCandidates.push_back(CI);
           ++MII;
         }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fa1209db2fa07..80fd830d10aa4 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1478,6 +1478,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasGFX1250Insts() const { return GFX1250Insts; }
 
+  bool hasVOPD3() const { return GFX1250Insts; }
+
   // \returns true if target has S_SETPRIO_INC_WG instruction.
   bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 33c208495c500..9e66909e41052 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -36,11 +36,19 @@ using namespace llvm;
 
 bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
                                    const MachineInstr &FirstMI,
-                                   const MachineInstr &SecondMI) {
+                                   const MachineInstr &SecondMI, bool IsVOPD3) {
   namespace VOPD = AMDGPU::VOPD;
 
   const MachineFunction *MF = FirstMI.getMF();
   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+  if (IsVOPD3 && !ST.hasVOPD3())
+    return false;
+  if (!IsVOPD3 && (TII.isVOP3(FirstMI) || TII.isVOP3(SecondMI)))
+    return false;
+  if (TII.isDPP(FirstMI) || TII.isDPP(SecondMI))
+    return false;
+
   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
   const MachineRegisterInfo &MRI = MF->getRegInfo();
   // Literals also count against scalar bus limit
@@ -80,23 +88,61 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
   for (auto CompIdx : VOPD::COMPONENTS) {
     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
 
-    const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
+    const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
     if (Src0.isReg()) {
       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
           UniqueScalarRegs.push_back(Src0.getReg());
       }
-    } else {
-      if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
-        addLiteral(Src0);
+    } else if (!TII.isInlineConstant(Src0)) {
+      if (IsVOPD3)
+        return false;
+      addLiteral(Src0);
     }
 
     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
+      if (IsVOPD3)
+        return false;
+
       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
       addLiteral(MI.getOperand(CompOprIdx));
     }
     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
+
+    if (IsVOPD3) {
+      for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
+        const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
+        if (!Src)
+          continue;
+        if (OpName == AMDGPU::OpName::src2) {
+          if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
+            continue;
+          if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
+            UniqueScalarRegs.push_back(Src->getReg());
+            continue;
+          }
+        }
+        if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
+          return false;
+      }
+
+      for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
+                          AMDGPU::OpName::op_sel}) {
+        if (TII.hasModifiersSet(MI, OpName))
+          return false;
+      }
+
+      // Neg is allowed, other modifiers are not. NB: even though sext has the
+      // same value as neg, there are no combinable instructions with sext.
+      for (auto OpName :
+           {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
+            AMDGPU::OpName::src2_modifiers}) {
+        const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
+        if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
+          return false;
+      }
+    }
   }
 
   if (UniqueLiterals.size() > 1)
@@ -104,14 +150,33 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
     return false;
 
-  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
+  // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
+  // source-cache.
   bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
                  FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
                  SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
+  bool AllowSameVGPR = ST.hasGFX1250Insts();
 
-  if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
+  if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
+                                 IsVOPD3))
     return false;
 
+  if (IsVOPD3) {
+    // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
+    if (AMDGPU::hasNamedOperand(SecondMI.getOpcode(), AMDGPU::OpName::bitop3)) {
+      const MachineOperand &Src2 =
+          *TII.getNamedOperand(SecondMI, AMDGPU::OpName::src2);
+      if (!Src2.isImm() || Src2.getImm())
+        return false;
+    }
+    if (AMDGPU::hasNamedOperand(FirstMI.getOpcode(), AMDGPU::OpName::bitop3)) {
+      const MachineOperand &Src2 =
+          *TII.getNamedOperand(FirstMI, AMDGPU::OpName::src2);
+      if (!Src2.isImm() || Src2.getImm())
+        return false;
+    }
+  }
+
   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
                     << "\n\tY: " << SecondMI << "\n");
   return true;
@@ -125,21 +190,28 @@ static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
                                        const MachineInstr *FirstMI,
                                        const MachineInstr &SecondMI) {
   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
+  const GCNSubtarget &ST = STII.getSubtarget();
+  unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
   unsigned Opc2 = SecondMI.getOpcode();
-  auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
 
-  // One instruction case
-  if (!FirstMI)
-    return SecondCanBeVOPD.Y;
+  const auto checkVOPD = [&](bool VOPD3) -> bool {
+    auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
 
-  unsigned Opc = FirstMI->getOpcode();
-  auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+    // One instruction case
+    if (!FirstMI)
+      return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
 
-  if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
-        (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
-    return false;
+    unsigned Opc = FirstMI->getOpcode();
+    auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
+
+    if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
+          (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
+      return false;
+
+    return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
+  };
 
-  return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
+  return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
 }
 
 namespace {
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h
index 22361b9a1a078..f776ae95e79c4 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.h
@@ -23,7 +23,7 @@ class SIInstrInfo;
 
 bool checkVOPDRegConstraints(const SIInstrInfo &TII,
                              const MachineInstr &FirstMI,
-                             const MachineInstr &SecondMI);
+                             const MachineInstr &SecondMI, bool IsVOPD3);
 
 std::unique_ptr<ScheduleDAGMutation> createVOPDPairingMutation();
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 76e29e4393206..c384d49ae7a80 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -98,6 +98,8 @@ enum : uint64_t {
   // VINTERP instruction format.
   VINTERP = 1 << 29,
 
+  VOPD3 = 1 << 30,
+
   // High bits - other information.
   VM_CNT = UINT64_C(1) << 32,
   EXP_CNT = UINT64_C(1) << 33,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index c27d4e0df6fc5..a368bc5d0b1a1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -55,6 +55,8 @@ class InstSI <dag outs, dag ins, string asm = "",
   // VINTERP instruction format.
   field bit VINTERP = 0;
 
+  field bit VOPD3 = 0;
+
   // High bits - other information.
   field bit VM_CNT = 0;
   field bit EXP_CNT = 0;
@@ -195,6 +197,7 @@ class InstSI <dag outs, dag ins, string asm = "",
 
   let TSFlags{28} = LDSDIR;
   let TSFlags{29} = VINTERP;
+  let TSFlags{30} = VOPD3;
 
   let TSFlags{32} = VM_CNT;
   let TSFlags{33} = EXP_CNT;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 6d6c2af7ce490..a63aa55f8a73d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1327,6 +1327,11 @@ class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
   let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
 }
 
+class FPVRegSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
+  let Name = "VRegWithFP"#opSize#"InputMods";
+  let PredicateMethod = "isVRegWithFP"#opSize#"InputMods";
+}
+
 def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
 class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> {
   let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods",
@@ -1345,6 +1350,10 @@ class FP16VCSrcInputModsMatchClass<bit IsFake16>
                         !if(IsFake16, "true", "false") # ">";
 }
 def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
+def FP64VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<64>;
+
+def FP32VRegSrcInputModsMatchClass : FPVRegSrcInputModsMatchClass<32>;
+def FP64VRegSrcInputModsMatchClass : FPVRegSrcInputModsMatchClass<64>;
 
 class InputMods <AsmOperandClass matchClass> : Operand <i32> {
   let OperandNamespace = "AMDGPU";
@@ -1371,6 +1380,10 @@ class FPT16VCSrcInputMods<bit IsFake16 = 1>
   let EncoderMethod = "getMachineOpValueT16";
 }
 def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
+def FP64VCSrcInputMods : FPInputMods<FP64VCSrcInputModsMatchClass>;
+
+def FP32VRegSrcInputMods : FPInputMods<FP32VRegSrcInputModsMatchClass>;
+def FP64VRegSrcInputMods : FPInputMods<FP64VRegSrcInputModsMatchClass>;
 
 class IntInputModsMatchClass <int opSize> : AsmOperandClass {
   let Name = "RegOrImmWithInt"#opSize#"InputMods";
@@ -1782,6 +1795,32 @@ class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
         1               : VSrc_b32);
 }
 
+// Returns the register class to use for source VGPR, SGPR or inline constant
+// for the given VT.
+class getVCSrcForVT<ValueType VT> {
+  RegisterOperand ret =
+    !if(VT.isFP,
+      !if(!eq(VT.Size, 64),
+         VCSrc_f64,
+         !cond(!eq(VT.Value, f16.Value)    : VCSrc_f16,
+               !eq(VT.Value, bf16.Value)   : VCSrc_bf16,
+               !eq(VT.Value, v2f16.Value)  : VCSrc_v2f16,
+               !eq(VT.Value, v2bf16.Value) : VCSrc_v2bf16,
+               1 : VCSrc_f32)
+       ),
+       !if(!eq(VT.Size, 64),
+          VCSrc_b64,
+          !if(!eq(VT.Value, i16.Value),
+             VCSrc_b16,
+             !if(!eq(VT.Value, v2i16.Value),
+                VCSrc_v2b16,
+                VCSrc_b32
+             )
+          )
+       )
+    );
+}
+
 class getSOPSrcForVT<ValueType VT> {
   RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
 }
@@ -1922,6 +1961,20 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
                 IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods));
 }
 
+// Return type of input modifiers operand for specified input operand for DPP
+// or VOPD3.
+class getSrcModVOP3VC <ValueType VT, bit IsFake16 = 1> {
+  Operand ret =
+      !if (VT.isFP,
+           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
+                FPT16VCSrcInputMods<IsFake16>,
+                !if (!eq(VT.Value, f64.Value), FP64VCSrcInputMods,
+                     FP32VCSrcInputMods)),
+           !if (!eq(VT.Value, i16.Value),
+                IntT16VCSrcInputMods<IsFake16>,
+                Int32VCSrcInputMods));
+}
+
 // Return type of input modifiers operand for specified input operand for DPP
 // True16: If the destination is a 16-bit value, the src0 modifier must hold
 // dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
@@ -1943,16 +1996,12 @@ class getSrc0ModVOP3DPP <ValueType VT, ValueType DstVT, bit IsFake16 = 1> {
   Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal);
 }
 
-// GFX11 only supports VGPR src1, but the restriction is done in AsmParser
-// and GCNDPPCombine.
-class getSrcModVOP3DPP<ValueType VT, bit IsFake16 = 1> {
+// Return type of input modifiers operand for specified input operand for VGPR
+// only operands (VOPD3 vsrc1 and vsrc2).
+class getSrcModVOP3V <ValueType VT> {
   Operand ret =
-      !if (VT.isFP,
-           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
-                FPT16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
-           !if (!eq(VT.Value, i16.Value),
-                IntT16VCSrcInputMods<IsFake16>,
-                Int32VCSrcInputMods));
+      !if (!eq(VT.Value, f64.Value), FP64VRegSrcInputMods,
+           FP32VRegSrcInputMods);
 }
 
 // Return type of input modifiers operand specified input operand for SDWA
@@ -2185,6 +2234,27 @@ class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs
             (ins)/* endif */)));
 }
 
+// Ins for VOPD3
+class getInsVOPD3<RegisterOperand Src0VOPD3, RegisterOperand Src1VOPD3, RegisterOperand Src2VOPD3,
+                  Operand Src0Mod, Operand Src1Mod, Operand Src2Mod,
+                  bit HasSrc1, bit HasSrc2, bit HasModifiers, bit IsCompY> {
+  dag Src0 = !if(HasModifiers,
+                 !if(IsCompY, (ins Src0Mod:$src0Y_modifiers, Src0VOPD3:$src0Y),
+                              (ins Src0Mod:$src0X_modifiers, Src0VOPD3:$src0X)),
+                 !if(IsCompY, (ins Src0VOPD3:$src0Y), (ins Src0VOPD3:$src0X)));
+  dag Src1 = !if(HasModifiers,
+                 !if(IsCompY, (ins Src1Mod:$vsrc1Y_modifiers, Src1VOPD3:$vsrc1Y),
+                              (ins Src1Mod:$vsrc1X_modifiers, Src1VOPD3:$vsrc1X)),
+                 !if(IsCompY, (ins Src1VOPD3:$vsrc1Y), (ins Src1VOPD3:$vsrc1X)));
+  dag Src2 = !if(HasModifiers,
+                 !if(IsCompY, (ins Src2Mod:$vsrc2Y_modifiers, Src2VOPD3:$vsrc2Y),
+                              (ins Src2Mod:$vsrc2X_modifiers, Src2VOPD3:$vsrc2X)),
+                 !if(IsCompY, (ins Src2VOPD3:$vsrc2Y), (ins Src2VOPD3:$vsrc2X)));
+  dag ret = !con(Src0,
+                 !if(HasSrc1, Src1, (ins)),
+                 !if(HasSrc2, Src2, (ins)));
+}
+
 // Outs for DPP
 class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
   dag ret = !if(HasDst,
@@ -2216,13 +2286,16 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
                !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
 }
 
-class getAsmVOPDPart <int NumSrcArgs, string XorY> {
+class getAsmVOPDPart <int NumSrcArgs, string XorY, bit HasVOPD3Src2 = 0, bit HasModifiers = 0> {
+  string mods = !if(HasModifiers, "_modifiers", "");
   string dst = "$vdst" # XorY;
-  string src0 = ", $src0" # XorY;
-  string src1 = ", $vsrc1" # XorY;
+  string src0 = ", $src0" # XorY # mods;
+  string src1 = ", $vsrc1" # XorY # mods;
+  string src2 = ", $vsrc2" # XorY # mods;
   string ret = dst #
                !if(!ge(NumSrcArgs, 1), src0, "") #
-               !if(!ge(NumSrcArgs, 2), src1, "");
+               !if(!ge(NumSrcArgs, 2), src1, "") #
+               !if(HasVOPD3Src2, src2, "");
 }
 
 // Returns the assembly string for the inputs and outputs of a VOP3P
@@ -2515,10 +2588,16 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
   field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
   field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT>.ret;
-  field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
-  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
+  field Operand Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT>.ret;
+  field Operand Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT>.ret;
   field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
   field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
+  field RegisterOperand Src0VOPD3 = getVCSrcForVT<Src0VT>.ret;
+  field RegisterOperand Src1VOPD3 = getVregSrcForVT<Src1VT>.ret;
+  field RegisterOperand Src2VOPD3 = getVregSrcForVT<Src2VT>.ret;
+  field Operand Src0ModVOPD3 = getSrcModVOP3VC<Src0VT>.ret;
+  field Operand Src1ModVOPD3 = getSrcModVOP3V<Src1VT>.ret;
+  field Operand Src2ModVOPD3 = getSrcModVOP3V<Src2VT>.ret;
 
 
   field bit IsMAI = 0;
@@ -2642,6 +2721,13 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   // component is FMAAK or FMAMK
   field dag InsVOPDX_immX = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32, VSrc_f16):$src0X, VGPR_32:$vsrc1X);
   field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
+  field bit HasVOPD3Src2 = !empty(Ins32); // VOP3 does not have Ins32.
+  field dag InsVOPD3X = getInsVOPD3<Src0VOPD3, Src1VOPD3, Src2VOPD3,
+                                    Src0ModVOPD3, Src1ModVOPD3, Src2ModVOPD3,
+                                    HasSrc1, HasVOPD3Src2, HasModifiers, 0>.ret;
+  field dag InsVOPD3Y = getInsVOPD3<Src0VOPD3, Src1VOPD3, Src2VOPD3,
+                                    Src0ModVOPD3, Src1ModVOPD3, Src2ModVOPD3,
+                                    HasSrc1, HasVOPD3Src2, HasModifiers, 1>.ret;
 
   field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
   field string AsmDPP = !if(HasExtDPP,
@@ -2662,6 +2748,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
   field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
   field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
+  field string AsmVOPD3X = getAsmVOPDPart<NumSrcArgs, "X", HasVOPD3Src2, HasModifiers>.ret;
+  field string AsmVOPD3Y = getAsmVOPDPart<NumSrcArgs, "Y", HasVOPD3Src2, HasModifiers>.ret;
   field string TieRegDPP = "$old";
   field bit IsSMFMAC = false;
   field bit HasAbid = !and(IsMAI, HasSrc1);
@@ -2705,8 +2793,8 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
   let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
-  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0 /*IsFake16*/>.ret;
+  let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0 /*IsFake16*/>.ret;
 
   let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
   let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
@@ -2735,8 +2823,8 @@ class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1 /*IsFake16*/>.ret;
   let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1 /*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1 /*IsFake16*/>.ret;
+  let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1 /*IsFake16*/>.ret;
 }
 
 def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
@@ -3224,7 +3312,7 @@ def FP4FP8DstByteSelTable : GenericTable {
 def VOPDComponentTable : GenericTable {
   let FilterClass = "VOPD_Component";
   let CppTypeName = "VOPDComponentInfo";
-  let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
+  let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX", "CanBeVOPD3X"];
   let PrimaryKey = ["BaseVOP"];
   let PrimaryKeyName = "getVOPDComponentHelper";
 }
@@ -3237,14 +3325,14 @@ def getVOPDBaseFromComponent : SearchIndex {
 def VOPDPairs : GenericTable {
   let FilterClass = "VOPD_Base";
   let CppTypeName = "VOPDInfo";
-  let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
+  let Fields = ["Opcode", "OpX", "OpY", "SubTgt", "VOPD3"];
   let PrimaryKey = ["Opcode"];
   let PrimaryKeyName = "getVOPDOpcodeHelper";
 }
 
 def getVOPDInfoFromComponentOpcodes : SearchIndex {
   let Table = VOPDPairs;
-  let Key = ["OpX", "OpY", "SubTgt"];
+  let Key = ["OpX", "OpY", "SubTgt", "VOPD3"];
 }
 
 include "SIInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index a6c7b164c8b2c..d24c301fc1e51 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1267,6 +1267,8 @@ def VCSrc_bf16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_BF16">;
 def VCSrc_f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_FP16">;
 def VCSrc_b32 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_INT32">;
 def VCSrc_f32 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_FP32">;
+def VCSrc_b64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_INT64">;
+def VCSrc_f64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_FP64">;
 def VCSrc_v2b16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2INT16">;
 def VCSrc_v2bf16: SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2BF16">;
 def VCSrc_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2FP16">;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5b21f4a6083a5..c500bccc50e28 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -392,6 +392,7 @@ struct VOPDComponentInfo {
   uint16_t BaseVOP;
   uint16_t VOPDOp;
   bool CanBeVOPDX;
+  bool CanBeVOPD3X;
 };
 
 struct VOPDInfo {
@@ -399,6 +400,7 @@ struct VOPDInfo {
   uint16_t OpX;
   uint16_t OpY;
   uint16_t Subtarget;
+  bool VOPD3;
 };
 
 struct VOPTrue16Info {
@@ -591,6 +593,8 @@ const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
 }
 
 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
+  if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
+    return SIEncodingFamily::GFX1250;
   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
     return SIEncodingFamily::GFX12;
   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
@@ -598,14 +602,27 @@ unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
   llvm_unreachable("Subtarget generation does not support VOPD!");
 }
 
-CanBeVOPD getCanBeVOPD(unsigned Opc) {
+CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
+  bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
+  Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
-  if (Info)
-    return {Info->CanBeVOPDX, true};
+  if (Info) {
+    // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
+    // VOPDX is just a placeholder here, it is supported on all encodings.
+    // TODO: This can be optimized by creating tables of supported VOPDY
+    // opcodes per encoding.
+    unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3);
+    bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3),
+                                  EncodingFamily, VOPD3) != -1;
+    return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};
+  }
+
   return {false, false};
 }
 
-unsigned getVOPDOpcode(unsigned Opc) {
+unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
+  bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
+  Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
   return Info ? Info->VOPDOp : ~0u;
 }
@@ -742,9 +759,27 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
 }
 
-int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
+unsigned getBitOp2(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return 0;
+  case AMDGPU::V_AND_B32_e32:
+    return 0x40;
+  case AMDGPU::V_OR_B32_e32:
+    return 0x54;
+  case AMDGPU::V_XOR_B32_e32:
+    return 0x14;
+  case AMDGPU::V_XNOR_B32_e32:
+    return 0x41;
+  }
+}
+
+int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
+                bool VOPD3) {
+  bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
+  OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
   const VOPDInfo *Info =
-      getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
+      getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
   return Info ? Info->Opcode : -1;
 }
 
@@ -759,7 +794,7 @@ std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
 
 namespace VOPD {
 
-ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
+ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
   assert(OpDesc.getNumDefs() == Component::DST_NUM);
 
   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
@@ -767,10 +802,34 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
   assert(TiedIdx == -1 || TiedIdx == Component::DST);
   HasSrc2Acc = TiedIdx != -1;
+  Opcode = OpDesc.getOpcode();
 
-  SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
+  IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
+  SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2)   ? 3
+                   : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)  ? 3
+                   : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
+                                                                           : 1;
   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
 
+  if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
+      Opcode == AMDGPU::V_CNDMASK_B32_e64) {
+    // CNDMASK is an awkward exception, it has FP modifiers, but not FP
+    // operands.
+    NumVOPD3Mods = 2;
+    if (IsVOP3)
+      SrcOperandsNum = 3;
+  } else if (isSISrcFPOperand(OpDesc,
+                              getNamedOperandIdx(Opcode, OpName::src0))) {
+    // All FP VOPD instructions have Neg modifiers for all operands except
+    // for tied src2.
+    NumVOPD3Mods = SrcOperandsNum;
+    if (HasSrc2Acc)
+      --NumVOPD3Mods;
+  }
+
+  if (OpDesc.TSFlags & SIInstrFlags::VOP3)
+    return;
+
   auto OperandsNum = OpDesc.getNumOperands();
   unsigned CompOprIdx;
   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
@@ -781,6 +840,10 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
   }
 }
 
+int ComponentProps::getBitOp3OperandIdx() const {
+  return getNamedOperandIdx(Opcode, OpName::bitop3);
+}
+
 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
   assert(CompOprIdx < Component::MAX_OPR_NUM);
 
@@ -796,19 +859,58 @@ unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
 }
 
 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
-    std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
+    std::function<unsigned(unsigned, unsigned)> GetRegIdx,
+    const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
+    bool VOPD3) const {
+
+  auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
+                               CompInfo[ComponentIndex::X].isVOP3());
+  auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
+                               CompInfo[ComponentIndex::Y].isVOP3());
+
+  const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
+                                   unsigned BanksMask) -> bool {
+    MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
+    MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
+    if (!BaseX)
+      BaseX = X;
+    if (!BaseY)
+      BaseY = Y;
+    if ((BaseX & BanksMask) == (BaseY & BanksMask))
+      return true;
+    if (BaseX != X /* This is 64-bit register */ &&
+        ((BaseX + 1) & BanksMask) == (BaseY & BanksMask))
+      return true;
+    if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask))
+      return true;
 
-  auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
-  auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
+    // If both are 64-bit bank conflict will be detected yet while checking
+    // the first subreg.
+    return false;
+  };
 
-  const unsigned CompOprNum =
-      SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
   unsigned CompOprIdx;
-  for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
-    unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
-    if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
-        ((OpXRegs[CompOprIdx] & BanksMasks) ==
-         (OpYRegs[CompOprIdx] & BanksMasks)))
+  for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
+    unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
+                                : VOPD_VGPR_BANK_MASKS[CompOprIdx];
+    if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
+      continue;
+
+    if (SkipSrc && CompOprIdx >= Component::DST_NUM)
+      continue;
+
+    if (CompOprIdx < Component::DST_NUM) {
+      // Even if we do not check vdst parity, vdst operands still shall not
+      // overlap.
+      if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
+        return CompOprIdx;
+      if (VOPD3) // No need to check dst parity.
+        continue;
+    }
+
+    if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
+        (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
+         OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
       return CompOprIdx;
   }
 
@@ -822,9 +924,10 @@ std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
 // for the specified component and MC operand. The callback must return 0
 // if the operand is not a register or not a VGPR.
-InstInfo::RegIndices InstInfo::getRegIndices(
-    unsigned CompIdx,
-    std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
+InstInfo::RegIndices
+InstInfo::getRegIndices(unsigned CompIdx,
+                        std::function<unsigned(unsigned, unsigned)> GetRegIdx,
+                        bool VOPD3) const {
   assert(CompIdx < COMPONENTS_NUM);
 
   const auto &Comp = CompInfo[CompIdx];
@@ -836,7 +939,8 @@ InstInfo::RegIndices InstInfo::getRegIndices(
     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
     RegIndices[CompOprIdx] =
         Comp.hasRegSrcOperand(CompSrcIdx)
-            ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
+            ? GetRegIdx(CompIdx,
+                        Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
             : 0;
   }
   return RegIndices;
@@ -853,8 +957,9 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
   const auto &OpXDesc = InstrInfo->get(OpX);
   const auto &OpYDesc = InstrInfo->get(OpY);
-  VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
-  VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
+  bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
+  VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
+  VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
   return VOPD::InstInfo(OpXInfo, OpYInfo);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4f7d18170d586..e6840d97e3f3d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -593,6 +593,11 @@ bool getMAIIsDGEMM(unsigned Opc);
 LLVM_READONLY
 bool getMAIIsGFX940XDL(unsigned Opc);
 
+// Get an equivalent BitOp3 for a binary logical \p Opc.
+// \returns BitOp3 modifier for the logical operation or zero.
+// Used in VOPD3 conversion.
+unsigned getBitOp2(unsigned Opc);
+
 struct CanBeVOPD {
   bool X;
   bool Y;
@@ -603,7 +608,7 @@ LLVM_READONLY
 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
 
 LLVM_READONLY
-CanBeVOPD getCanBeVOPD(unsigned Opc);
+CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
 
 LLVM_READNONE
 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
@@ -626,10 +631,11 @@ LLVM_READONLY
 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 
 LLVM_READONLY
-unsigned getVOPDOpcode(unsigned Opc);
+unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
 
 LLVM_READONLY
-int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
+int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
+                bool VOPD3);
 
 LLVM_READONLY
 bool isVOPD(unsigned Opc);
@@ -662,6 +668,7 @@ enum Component : unsigned {
 // LSB mask for VGPR banks per VOPD component operand.
 // 4 banks result in a mask 3, setting 2 lower bits.
 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
+constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
 
 enum ComponentIndex : unsigned { X = 0, Y = 1 };
 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
@@ -673,10 +680,13 @@ class ComponentProps {
   unsigned SrcOperandsNum = 0;
   unsigned MandatoryLiteralIdx = ~0u;
   bool HasSrc2Acc = false;
+  unsigned NumVOPD3Mods = 0;
+  unsigned Opcode = 0;
+  bool IsVOP3 = false;
 
 public:
   ComponentProps() = default;
-  ComponentProps(const MCInstrDesc &OpDesc);
+  ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
 
   // Return the total number of src operands this component has.
   unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
@@ -706,6 +716,18 @@ class ComponentProps {
   // Return true iif this component has tied src2.
   bool hasSrc2Acc() const { return HasSrc2Acc; }
 
+  // Return a number of source modifiers if instruction is used in VOPD3.
+  unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
+
+  // Return opcode of the component.
+  unsigned getOpcode() const { return Opcode; }
+
+  // Returns if component opcode is in VOP3 encoding.
+  unsigned isVOP3() const { return IsVOP3; }
+
+  // Return index of BitOp3 operand or -1.
+  int getBitOp3OperandIdx() const;
+
 private:
   bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
     assert(CompSrcIdx < Component::MAX_SRC_NUM);
@@ -758,7 +780,15 @@ class ComponentLayout {
   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
   // Each ComponentKind has operand indices defined below.
   static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
-  static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
+
+  // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
+  // used if there is tied accumulator. Indexing of this array:
+  // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
+  // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
+  // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
+  // For VOPD1/VOPD2 use column with zero modifiers.
+  static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
+      {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
 
   // Parsed operands of regular instructions are ordered as follows:
   //   Mnemo dst src0 [vsrc1 ...]
@@ -774,25 +804,40 @@ class ComponentLayout {
 private:
   const ComponentKind Kind;
   const ComponentProps PrevComp;
+  const unsigned VOPD3ModsNum;
+  const int BitOp3Idx; // Index of bitop3 operand or -1
 
 public:
   // Create layout for COMPONENT_X or SINGLE component.
-  ComponentLayout(ComponentKind Kind) : Kind(Kind) {
+  ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
+      : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
     assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
   }
 
   // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
-  ComponentLayout(const ComponentProps &OpXProps)
-      : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
+  ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
+                  int BitOp3Idx)
+      : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
+        VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
 
 public:
   // Return the index of dst operand in MCInst operands.
   unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
 
   // Return the index of the specified src operand in MCInst operands.
-  unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
+  unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
     assert(CompSrcIdx < Component::MAX_SRC_NUM);
-    return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
+
+    if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
+      return BitOp3Idx;
+
+    if (VOPD3) {
+      return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
+             getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
+    }
+
+    return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
+           (Kind != SINGLE ? 1 : 0);
   }
 
   // Return the index of dst operand in the parsed operands array.
@@ -813,19 +858,27 @@ class ComponentLayout {
   unsigned getPrevCompParsedSrcNum() const {
     return PrevComp.getCompParsedSrcOperandsNum();
   }
+  unsigned getPrevCompVOPD3ModsNum() const {
+    return PrevComp.getCompVOPD3ModsNum();
+  }
 };
 
 // Layout and properties of VOPD components.
-class ComponentInfo : public ComponentLayout, public ComponentProps {
+class ComponentInfo : public ComponentProps, public ComponentLayout {
 public:
   // Create ComponentInfo for COMPONENT_X or SINGLE component.
   ComponentInfo(const MCInstrDesc &OpDesc,
-                ComponentKind Kind = ComponentKind::SINGLE)
-      : ComponentLayout(Kind), ComponentProps(OpDesc) {}
+                ComponentKind Kind = ComponentKind::SINGLE,
+                bool VOP3Layout = false)
+      : ComponentProps(OpDesc, VOP3Layout),
+        ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
 
   // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
-  ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
-      : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
+  ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
+                bool VOP3Layout = false)
+      : ComponentProps(OpDesc, VOP3Layout),
+        ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
+                        getBitOp3OperandIdx()) {}
 
   // Map component operand index to parsed operand index.
   // Return 0 if the specified operand does not exist.
@@ -857,23 +910,36 @@ class InstInfo {
   // if the operand is not a register or not a VGPR.
   // If \p SkipSrc is set to true then constraints for source operands are not
   // checked.
+  // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
+  // even though it violates requirement to be from different banks.
+  // If \p VOPD3 is set to true both dst registers allowed to be either odd
+  // or even and instruction may have real src2 as opposed to tied accumulator.
   bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
-                         bool SkipSrc = false) const {
-    return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
+                         const MCRegisterInfo &MRI, bool SkipSrc = false,
+                         bool AllowSameVGPR = false, bool VOPD3 = false) const {
+    return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
+                                      VOPD3)
+        .has_value();
   }
 
   // Check VOPD operands constraints.
   // Return the index of an invalid component operand, if any.
   // If \p SkipSrc is set to true then constraints for source operands are not
-  // checked.
+  // checked except for being from the same halves of VGPR file on gfx1250.
+  // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
+  // even though it violates requirement to be from different banks.
+  // If \p VOPD3 is set to true both dst registers allowed to be either odd
+  // or even and instruction may have real src2 as opposed to tied accumulator.
   std::optional<unsigned> getInvalidCompOperandIndex(
       std::function<unsigned(unsigned, unsigned)> GetRegIdx,
-      bool SkipSrc = false) const;
+      const MCRegisterInfo &MRI, bool SkipSrc = false,
+      bool AllowSameVGPR = false, bool VOPD3 = false) const;
 
 private:
   RegIndices
   getRegIndices(unsigned ComponentIdx,
-                std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
+                std::function<unsigned(unsigned, unsigned)> GetRegIdx,
+                bool VOPD3) const;
 };
 
 } // namespace VOPD
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 859d5bae3d460..15491fef6c1aa 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -148,7 +148,7 @@ multiclass VOP2Inst_e32<string opName,
                Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
 }
 multiclass
-    VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
+    VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<6> VOPDOp,
                       string VOPDName, SDPatternOperator node = null_frag,
                       string revOp = opName> {
   defm NAME : VOP2Inst_e32<opName, P, node, revOp>,
@@ -167,6 +167,15 @@ multiclass VOP2Inst_e64<string opName,
     } // End SubtargetPredicate = isGFX11Plus
 }
 
+multiclass VOP2Inst_e64_VOPD<string opName,
+                             VOPProfile P, bits<6> VOPDOp,
+                             string VOPDName,
+                             SDPatternOperator node = null_frag,
+                             string revOp = opName> {
+  defm NAME: VOP2Inst_e64<opName, P, node, revOp>,
+             VOPD_Component<VOPDOp, VOPDName>;
+}
+
 multiclass VOP2Inst_sdwa<string opName,
                          VOPProfile P,
                          string revOp = opName> {
@@ -227,12 +236,12 @@ multiclass VOP2Inst_e64_t16<string opName,
 
 multiclass VOP2Inst_VOPD<string opName,
                          VOPProfile P,
-                         bits<5> VOPDOp,
+                         bits<6> VOPDOp,
                          string VOPDName,
                          SDPatternOperator node = null_frag,
                          string revOp = opName> :
     VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp>,
-    VOP2Inst_e64<opName, P, node, revOp>,
+    VOP2Inst_e64_VOPD<opName, P, VOPDOp, VOPDName, node, revOp>,
     VOP2Inst_sdwa<opName, P, revOp> {
     if P.HasExtDPP then
       def _dpp  : VOP2_DPP_Pseudo <opName, P>;
@@ -288,7 +297,7 @@ multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
 }
 
 multiclass
-    VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+    VOP2eInst_Base<string opName, VOPProfile P, bits<6> VOPDOp, string VOPDName,
                    SDPatternOperator node, string revOp, bit useSGPRInput> {
 
   let SchedRW = [Write32Bit] in {
@@ -310,9 +319,14 @@ multiclass
         def _dpp  : VOP2_DPP_Pseudo <opName, P>;
     }
 
-    def _e64 : VOP3InstBase <opName, P, node, 1>,
-               Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
-      let isReMaterializable = 1;
+    let isReMaterializable = 1 in {
+    if !empty(VOPDName) then
+      def _e64 : VOP3InstBase <opName, P, node, 1>,
+                 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+    else
+      def _e64 : VOP3InstBase <opName, P, node, 1>,
+                 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
+                 VOPD_Component<VOPDOp, VOPDName>;
     }
 
     let SubtargetPredicate = isGFX11Plus in {
@@ -328,7 +342,7 @@ multiclass
     : VOP2eInst_Base<opName, P, 0, "", node, revOp, useSGPRInput>;
 
 multiclass
-    VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+    VOP2eInst_VOPD<string opName, VOPProfile P, bits<6> VOPDOp, string VOPDName,
                    SDPatternOperator node = null_frag, string revOp = opName,
                    bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
     : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
@@ -458,6 +472,12 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
   // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
   let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
   let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
+  let InsVOPD3X = (ins Src0ModVOPD3:$src0X_modifiers, Src0VOPD3:$src0X,
+                       Src1ModVOPD3:$vsrc1X_modifiers, Src1RC32:$vsrc1X,
+                       VGPRSrc_32:$src2X);
+  let InsVOPD3Y = (ins Src0ModVOPD3:$src0Y_modifiers, Src0VOPD3:$src0Y,
+                       Src1ModVOPD3:$vsrc1Y_modifiers, Src1RC32:$vsrc1Y,
+                       VGPRSrc_32:$src2Y);
 
   let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
                      Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
@@ -522,8 +542,8 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
   let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
-  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0/*IsFake16*/>.ret;
+  let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0/*IsFake16*/>.ret;
   let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
   let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
   let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
@@ -554,8 +574,8 @@ def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
   let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1/*IsFake16*/>.ret;
+  let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1/*IsFake16*/>.ret;
   let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
   let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
   let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
@@ -724,7 +744,13 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
   let HasExtSDWA9 = 1;
 }
 
-def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
+def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]> {
+  let Src2VOPD3 = SSrc_i1;
+  let InsVOPD3X = (ins FP32VCSrcInputMods:$src0X_modifiers, Src0VOPD3:$src0X, FP32VRegSrcInputMods:$vsrc1X_modifiers, Src1VOPD3:$vsrc1X, Src2VOPD3:$vsrc2X);
+  let InsVOPD3Y = (ins FP32VCSrcInputMods:$src0Y_modifiers, Src0VOPD3:$src0Y, FP32VRegSrcInputMods:$vsrc1Y_modifiers, Src1VOPD3:$vsrc1Y, Src2VOPD3:$vsrc2Y);
+  let AsmVOPD3X = "$vdstX, $src0X_modifiers, $vsrc1X_modifiers, $vsrc2X";
+  let AsmVOPD3Y = "$vdstY, $src0Y_modifiers, $vsrc1Y_modifiers, $vsrc2Y";
+}
 def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
 // V_CNDMASK_B16 is VOP3 only
 def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
@@ -745,7 +771,7 @@ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
   let Src0VOP3DPP = VGPRSrc_16;
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 0/*IsFake16*/>.ret;
 }
 def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
   let IsTrue16 = 1;
@@ -757,7 +783,7 @@ def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
   let Src0VOP3DPP = VGPRSrc_32;
   let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
   let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret;
-  let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret;
+  let Src1ModVOP3DPP = getSrcModVOP3VC<f16, 1/*IsFake16*/>.ret;
 }
 
 def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
@@ -819,12 +845,12 @@ defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul
 defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
 defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
 defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
-defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
-defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
+defm V_MIN_I32 : VOP2Inst_VOPD <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x18, "v_min_i32", smin>;
+defm V_MAX_I32 : VOP2Inst_VOPD <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x17, "v_max_i32", smax>;
 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
+defm V_LSHRREV_B32 : VOP2Inst_VOPD <"v_lshrrev_b32", VOP_I32_I32_I32, 0x15, "v_lshrrev_b32", clshr_rev_32, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst_VOPD <"v_ashrrev_i32", VOP_I32_I32_I32, 0x16, "v_ashrrev_i32", cashr_rev_32, "v_ashr_i32">;
 defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
 defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
@@ -856,7 +882,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
 
 
 let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
-  defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
+  defm V_SUB_U32 : VOP2Inst_VOPD <"v_sub_u32", VOP_I32_I32_I32_ARITH, 0x14, "v_sub_nc_u32", null_frag, "v_sub_u32">;
   defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
 }
 
@@ -1261,12 +1287,12 @@ let AddedComplexity = 30 in {
   }
 } // End AddedComplexity = 30
 
-let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in {
+let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1 in {
 def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
 
 let isCommutable = 1 in
 def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
-} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1
+} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1
 
 let SubtargetPredicate = HasPkFmacF16Inst in {
 defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
@@ -1390,12 +1416,10 @@ def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
 }
 
 let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in {
-  let SchedRW = [WriteDoubleAdd], isCommutable = 1 in {
-    let FPDPRounding = 1 in {
-      defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>;
-      defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>;
-    } // End FPDPRounding = 1
-  } // End SchedRW = [WriteDoubleAdd], isCommutable = 1
+  let SchedRW = [WriteDoubleAdd], isCommutable = 1, FPDPRounding = 1 in {
+    defm V_ADD_F64_pseudo : VOP2Inst_VOPD <"v_add_f64_pseudo", VOP_F64_F64_F64, 0x21, "v_add_f64", any_fadd>;
+    defm V_MUL_F64_pseudo : VOP2Inst_VOPD <"v_mul_f64_pseudo", VOP_F64_F64_F64, 0x22, "v_mul_f64", fmul>;
+  } // End SchedRW = [WriteDoubleAdd], isCommutable = 1, FPDPRounding = 1
   let SchedRW = [Write64Bit] in {
     defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>;
   } // End SchedRW = [Write64Bit]
@@ -1403,8 +1427,8 @@ let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in {
 
 let SubtargetPredicate = HasIEEEMinimumMaximumInsts, isReMaterializable = 1,
     SchedRW = [WriteDoubleAdd], isCommutable = 1 in {
-  defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>;
-  defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>;
+  defm V_MIN_NUM_F64 : VOP2Inst_VOPD <"v_min_num_f64", VOP_F64_F64_F64, 0x24, "v_min_num_f64", fminnum_like>;
+  defm V_MAX_NUM_F64 : VOP2Inst_VOPD <"v_max_num_f64", VOP_F64_F64_F64, 0x23, "v_max_num_f64", fmaxnum_like>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index e7ebc109b5dd5..75c531913ded1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -51,6 +51,8 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
 
 def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
 
+def VOP_F64_F64_F64_F64_DPP_PROF : VOP3_Profile<VOP_F64_F64_F64_F64>;
+
 def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
   let HasExtVOP3DPP = 0;
   let HasExtDPP = 0;
@@ -147,12 +149,12 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
 
 defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
 defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>;
+defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
 defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
 
 let SchedRW = [WriteDoubleAdd] in {
 let FPDPRounding = 1 in {
-defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
+defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP_F64_F64_F64_F64_DPP_PROF, any_fma>, VOPD_Component<0x20, "v_fma_f64">;
 let SubtargetPredicate = isNotGFX12Plus in {
 defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>;
 defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fmul>;
@@ -1033,7 +1035,11 @@ class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f>
   let HasClamp = 0;
   let HasOMod = 0;
   let HasModifiers = 0;
+  let HasVOPD3Src2 = 0;
   let HasBitOp3 = 1;
+
+  let InsVOPD3Y = (ins Src0VOPD3:$src0Y, Src1VOPD3:$vsrc1Y, bitop3_0:$bitop3);
+  let AsmVOPD3Y = getAsmVOPDPart<NumSrcArgs, "Y", HasVOPD3Src2, HasModifiers>.ret # "$bitop3";
 }
 
 class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
@@ -1416,7 +1422,8 @@ let SubtargetPredicate = HasBitOp3Insts  in {
     defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
                                   VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>>;
     defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
-                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
+                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>,
+                        VOPD_Component<0x12, "v_bitop2_b32">;
   }
   def : GCNPat<
     (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 9e84f6aed0176..2c1193509da9b 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -112,8 +112,8 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
     let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16: VOPC_Profile<sched, vt0, vt1> {
     let IsTrue16 = 1;
@@ -138,8 +138,8 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
     let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
@@ -184,8 +184,8 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
     let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16 : VOPC_NoSdst_Profile<sched, vt0, vt1> {
     let IsTrue16 = 1;
@@ -208,8 +208,8 @@ multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, Va
     let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
@@ -929,8 +929,8 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
     let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16 : VOPC_Class_Profile_Base<sched, f16, f16> {
     let IsTrue16 = 1;
@@ -955,8 +955,8 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
     let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
@@ -998,8 +998,8 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
     let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 0/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 0/*IsFake16*/>.ret;
   }
   def _fake16 : VOPC_Class_NoSdst_Profile<sched, f16, f16> {
     let IsTrue16 = 1;
@@ -1022,8 +1022,8 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
     let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
     let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
-    let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
-    let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
+    let Src1ModVOP3DPP = getSrcModVOP3VC<Src1VT, 1/*IsFake16*/>.ret;
+    let Src2ModVOP3DPP = getSrcModVOP3VC<Src2VT, 1/*IsFake16*/>.ret;
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index 4054002c41478..c4282c21a93cd 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -50,6 +50,47 @@ class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
   let Inst{95-64} = imm;
 }
 
+class VOPD3e<bits<6> opX, bits<6> opY, VOP_Pseudo VDX, VOP_Pseudo VDY> : Enc96 {
+  bits<9> src0X;
+  bits<8> vsrc1X;
+  bits<8> vsrc2X;
+  bits<8> vdstX;
+  bits<9> src0Y;
+  bits<8> vsrc1Y;
+  bits<8> vsrc2Y;
+  bits<8> vdstY;
+  // neg modifiers
+  bit src0X_modifiers;
+  bit src0Y_modifiers;
+  bit vsrc1X_modifiers;
+  bit vsrc1Y_modifiers;
+  bit vsrc2X_modifiers;
+  bit vsrc2Y_modifiers;
+  bits<8> bitop3;
+
+  let Inst{8-0} = src0X;
+  let Inst{17-12} = opY;
+  let Inst{23-18} = opX;
+  let Inst{31-24} = 0xcf; // encoding
+  let Inst{40-32} = src0Y;
+  let Inst{41} = !if(VDX.Pfl.HasModifiers, src0X_modifiers, 0);
+  let Inst{42} = !if(!and(VDX.Pfl.HasSrc1, VDX.Pfl.HasModifiers), vsrc1X_modifiers, 0);
+  let Inst{43} = !if(!and(VDX.Pfl.HasVOPD3Src2, VDX.Pfl.HasModifiers), vsrc2X_modifiers, 0);
+  let Inst{44} = !if(VDY.Pfl.HasModifiers, src0Y_modifiers, 0);
+  let Inst{45} = !if(!and(VDY.Pfl.HasSrc1, VDY.Pfl.HasModifiers), vsrc1Y_modifiers, 0);
+  let Inst{46} = !if(!and(VDY.Pfl.HasVOPD3Src2, VDY.Pfl.HasModifiers), vsrc2Y_modifiers, 0);
+  let Inst{55-48} = !if(!eq(!find(VDX.Pfl.AsmVOPD3X, "$vsrc1X"), -1), 0, vsrc1X);
+
+  // SP3 allows to use an SGPR in place of VSRC2 operand for V_DUAL_CNDMASK_B32
+  // despite the fact the operand does not accept SGPRs.
+  let Inst{63-56} = !if(!eq(!find(VDX.Pfl.AsmVOPD3X, "$vsrc2X"), -1), 0, vsrc2X);
+  let Inst{71-64} = vdstX;
+  let Inst{79-72} = !if(!eq(!find(VDY.Pfl.AsmVOPD3Y, "$vsrc1Y"), -1), 0, vsrc1Y);
+  let Inst{87-80} = !if(!ne(!find(VDY.Pfl.AsmVOPD3Y, "bitop"), -1), bitop3,
+                        !if(!eq(!find(VDY.Pfl.AsmVOPD3Y, "$vsrc2Y"), -1), 0, vsrc2Y));
+  let Inst{95-88} = vdstY;
+}
+
 //===----------------------------------------------------------------------===//
 // VOPD classes
 //===----------------------------------------------------------------------===//
@@ -71,8 +112,8 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
       SIMCInstr<NAME, Gen.Subtarget> {
   // Fields for table indexing
   Instruction Opcode = !cast<Instruction>(NAME);
-  bits<5> OpX = XasVC.VOPDOp;
-  bits<5> OpY = YasVC.VOPDOp;
+  bits<6> OpX = XasVC.VOPDOp;
+  bits<6> OpY = YasVC.VOPDOp;
   bits<4> SubTgt = Gen.Subtarget;
 
   let VALU = 1;
@@ -110,7 +151,7 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
 class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
            VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
     : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC, Gen>,
-      VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+      VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp{4-0}> {
   let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
   let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
 }
@@ -118,29 +159,48 @@ class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
 class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
                 VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
     : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC, Gen>,
-      VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+      VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp{4-0}> {
   let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
   let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
   let Size = 12;
   let FixedSize = 1;
 }
 
+class VOPD3<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+            VOPD_Component XasVC, VOPD_Component YasVC, GFXGenD Gen>
+    : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC, Gen>,
+      VOPD3e<XasVC.VOPDOp, YasVC.VOPDOp, VDX, VDY> {
+  let VOPD3 = 1;
+  let Size = 12;
+  // VOPD3 uses promoted form of VOP2 instructions, so V_CNDMASK_B32 is not
+  // limited to VCC src2 only, and a real SGPR will be used as an operand
+  // instead.
+  defvar UsesX = !if(!eq(VDX, V_CNDMASK_B32_e32), !filter(x, VDX.Uses, !ne(x, VCC)), VDX.Uses);
+  defvar UsesY = !if(!eq(VDY, V_CNDMASK_B32_e32), !filter(x, VDY.Uses, !ne(x, VCC)), VDY.Uses);
+  let Uses = RegListUnion<UsesX, UsesY>.ret;
+}
+
 defvar VOPDPseudosCommon = [
   "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
   "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
   "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
   "V_DOT2C_F32_F16_e32", "V_DOT2C_F32_BF16_e32"
 ];
-defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32",
-                                 "V_AND_B32_e32"];
+defvar VOPDYOnlyPseudosCommon = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32"];
+defvar VOPDYOnlyPseudosGFX11_12 = ["V_AND_B32_e32"];
+defvar VOPDYOnlyPseudosGFX1250 = ["V_MAX_I32_e32", "V_MIN_I32_e32",
+                                  "V_SUB_U32_e32", "V_LSHRREV_B32_e32",
+                                  "V_ASHRREV_I32_e32"];
 
 defvar VOPDXPseudosGFX11 = VOPDPseudosCommon;
 defvar VOPDXPseudosGFX12 = VOPDPseudosCommon;
-defvar VOPDYPseudosGFX11 = !listconcat(VOPDXPseudosGFX11, VOPDYOnlyPseudosCommon);
-defvar VOPDYPseudosGFX12 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon);
+defvar VOPDYPseudosGFX11 = !listconcat(VOPDXPseudosGFX11, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX11_12);
+defvar VOPDYPseudosGFX12 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX11_12);
+defvar VOPDYPseudosGFX1250 = !listconcat(VOPDXPseudosGFX12, VOPDYOnlyPseudosCommon, VOPDYOnlyPseudosGFX1250);
 
 def GFX11GenD : GFXGenD<GFX11Gen, VOPDXPseudosGFX11, VOPDYPseudosGFX11>;
-def GFX12GenD : GFXGenD<GFX12Gen, VOPDXPseudosGFX12, VOPDYPseudosGFX12>;
+def GFX12GenD : GFXGenD<GFX12Not12_50Gen, VOPDXPseudosGFX12, VOPDYPseudosGFX12>;
+def GFX1250GenD : GFXGenD<GFX1250Gen, VOPDXPseudosGFX12, VOPDYPseudosGFX1250>;
 
 
 def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
@@ -148,16 +208,13 @@ def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
 }
 
 class getRenamed<string VOPDName, GFXGen Gen> {
-  string ret = !if(!eq(Gen.Subtarget, GFX12Gen.Subtarget),
-                   !if(!eq(VOPDName, "v_dual_max_f32"),
-                       "v_dual_max_num_f32",
-                       !if(!eq(VOPDName, "v_dual_min_f32"),
-                           "v_dual_min_num_f32",
-                           VOPDName)),
-                   VOPDName);
+  string ret = !cond(!eq(Gen.Subtarget, GFX11Gen.Subtarget) : VOPDName,
+                     !eq(VOPDName, "v_dual_max_f32")        : "v_dual_max_num_f32",
+                     !eq(VOPDName, "v_dual_min_f32")        : "v_dual_min_num_f32",
+                     true                                   : VOPDName);
 }
 
-foreach Gen = [GFX11GenD, GFX12GenD] in {
+foreach Gen = [GFX11GenD, GFX12GenD, GFX1250GenD] in {
   foreach x = Gen.VOPDXPseudos in {
     foreach y = Gen.VOPDYPseudos in {
       defvar xInst = !cast<VOP_Pseudo>(x);
@@ -192,3 +249,41 @@ foreach Gen = [GFX11GenD, GFX12GenD] in {
   }
 }
 
+defvar VOPD3XPseudosExtra = ["V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_FMA_F32_e64", "V_SUB_U32_e32",
+                             "V_LSHRREV_B32_e32", "V_ASHRREV_I32_e32", "V_FMA_F64_e64", "V_ADD_F64_pseudo_e32",
+                             "V_MUL_F64_pseudo_e32", "V_MAX_NUM_F64_e32", "V_MIN_NUM_F64_e32"];
+defvar VOPD3XPseudosGFX1250 = !listconcat(
+                                !filter(x, VOPDXPseudosGFX12, !and(!eq(!find(x, "FMAAK"), -1),
+                                                                   !eq(!find(x, "FMAMK"), -1))),
+                                VOPD3XPseudosExtra);
+defvar VOPD3YPseudosExtra = ["V_BITOP3_B32_e64", "V_FMA_F32_e64"];
+defvar VOPD3YPseudosGFX1250 = !listconcat(
+                                !filter(x, VOPDYPseudosGFX1250, !and(!eq(!find(x, "FMAAK"), -1),
+                                                                     !eq(!find(x, "FMAMK"), -1))),
+                                VOPD3YPseudosExtra);
+
+def GFX1250GenD3 : GFXGenD<GFX1250Gen, VOPD3XPseudosGFX1250, VOPD3YPseudosGFX1250>;
+
+class getOpcMap<string OPName> {
+  defvar BaseName = !substr(OPName,2);
+  string ret = !cond(!eq(BaseName, "BITOP3_B32_e64")   : "BITOP2_B32_e64",
+                     1 : BaseName);
+}
+
+foreach Gen = [GFX1250GenD3] in {
+  foreach x = Gen.VOPDXPseudos in {
+    foreach y = Gen.VOPDYPseudos in {
+      defvar xInst = !cast<VOP_Pseudo>(x);
+      defvar yInst = !cast<VOP_Pseudo>(y);
+      defvar XasVC = !cast<VOPD_Component>(x);
+      defvar YasVC = !cast<VOPD_Component>(y);
+      defvar xAsmName = getRenamed<XasVC.VOPDName, Gen>.ret;
+      defvar yAsmName = getRenamed<YasVC.VOPDName, Gen>.ret;
+      defvar OpName = "V_DUAL_" # getOpcMap<x>.ret # "_X_" # getOpcMap<y>.ret # "_e96" # Gen.Suffix;
+      defvar asm = xAsmName # xInst.Pfl.AsmVOPD3X #" :: "# yAsmName #" "# yInst.Pfl.AsmVOPD3Y;
+      defvar ins = !con(xInst.Pfl.InsVOPD3X, yInst.Pfl.InsVOPD3Y);
+      defvar outs = (outs xInst.Pfl.DstRC:$vdstX, yInst.Pfl.DstRC:$vdstY);
+      def OpName : VOPD3<outs, ins, asm, xInst, yInst, XasVC, YasVC, Gen>;
+    }
+  }
+}
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 3e01f8cd044e2..49fde86ca6650 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -35,12 +35,17 @@ class VOP <string opName> {
 
 // First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted
 defvar VOPDX_Max_Index = 12;
+defvar VOPD3X_Max_Index = 36;
 
-class VOPD_Component<bits<5> OpIn, string vOPDName> {
+class VOPD_Component<bits<6> OpIn, string vOPDName> {
   Instruction BaseVOP = !cast<Instruction>(NAME);
   string VOPDName = "v_dual_" # !substr(vOPDName, 2);
-  bits<5> VOPDOp = OpIn;
+  bits<6> VOPDOp = OpIn;
   bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index);
+  bit CanBeVOPD3X = !and(!le(VOPDOp, VOPD3X_Max_Index),
+                         !and(!ne(vOPDName, "v_bitop2_b32"),
+                              !and(!ne(vOPDName, "v_max_i32"),
+                                   !ne(vOPDName, "v_min_i32"))));
 }
 
 class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
index 70ea0688c8a49..d8079651787ad 100644
--- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
@@ -23,9 +23,9 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
 ; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GCN-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    v_or_b32_e32 v1, v3, v2
-; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GCN-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GCN-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GCN-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GCN-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GCN-SDAG-NEXT:    global_store_b32 v[8:9], v0, off
@@ -41,8 +41,7 @@ define void @test_i8load_v4i8store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
 ; GCN-GISEL-NEXT:    s_wait_loadcnt 0x1
 ; GCN-GISEL-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
 ; GCN-GISEL-NEXT:    s_wait_loadcnt 0x0
-; GCN-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
-; GCN-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
+; GCN-GISEL-NEXT:    v_dual_lshlrev_b32 v1, 16, v2 :: v_dual_lshlrev_b32 v2, 24, v2
 ; GCN-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GCN-GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
 ; GCN-GISEL-NEXT:    global_store_b32 v[8:9], v0, off
@@ -66,17 +65,15 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
 ; GCN-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GCN-SDAG-NEXT:    global_load_b128 v[4:7], v[0:1], off
 ; GCN-SDAG-NEXT:    global_load_b128 v[0:3], v[2:3], off
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v8, 0
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-SDAG-NEXT:    s_wait_loadcnt 0x0
 ; GCN-SDAG-NEXT:    v_pk_add_u16 v10, v6, v2
 ; GCN-SDAG-NEXT:    v_pk_add_u16 v11, v7, v3
 ; GCN-SDAG-NEXT:    s_wait_xcnt 0x0
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v2, 12
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v6, 8
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v2, 12 :: v_dual_mov_b32 v6, 8
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v7, 0
+; GCN-SDAG-NEXT:    v_mov_b32_e32 v8, 0
 ; GCN-SDAG-NEXT:    v_pk_add_u16 v4, v4, v0
-; GCN-SDAG-NEXT:    v_lshrrev_b32_e32 v0, 16, v10
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v7, 0
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_lshrrev_b32 v0, 16, v10
 ; GCN-SDAG-NEXT:    v_pk_add_u16 v5, v5, v1
 ; GCN-SDAG-NEXT:    s_clause 0x2
 ; GCN-SDAG-NEXT:    global_store_b16 v[2:3], v11, off
@@ -90,18 +87,19 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
 ; GCN-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GCN-GISEL-NEXT:    global_load_b128 v[4:7], v[0:1], off
 ; GCN-GISEL-NEXT:    global_load_b128 v[0:3], v[2:3], off
-; GCN-GISEL-NEXT:    v_mov_b32_e32 v8, 0
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 2
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 4
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 6
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 8
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 10
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v20, 12 :: v_dual_mov_b32 v19, 0
-; GCN-GISEL-NEXT:    v_mov_b32_e32 v21, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v10, 2
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v11, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v14, 6
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v15, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v16, 8 :: v_dual_mov_b32 v18, 10
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v19, 0
 ; GCN-GISEL-NEXT:    s_wait_loadcnt 0x0
 ; GCN-GISEL-NEXT:    v_pk_add_u16 v2, v6, v2
 ; GCN-GISEL-NEXT:    v_pk_add_u16 v4, v4, v0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v20, 12
 ; GCN-GISEL-NEXT:    v_pk_add_u16 v1, v5, v1
+; GCN-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_lshrrev_b32 v0, 16, v2
 ; GCN-GISEL-NEXT:    v_pk_add_u16 v3, v7, v3
 ; GCN-GISEL-NEXT:    s_clause 0x6
 ; GCN-GISEL-NEXT:    global_store_b16 v[8:9], v4, off
@@ -111,7 +109,6 @@ define i16 @test_v7i16_load_store(ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2
 ; GCN-GISEL-NEXT:    global_store_b16 v[16:17], v2, off
 ; GCN-GISEL-NEXT:    global_store_d16_hi_b16 v[18:19], v2, off
 ; GCN-GISEL-NEXT:    global_store_b16 v[20:21], v3, off
-; GCN-GISEL-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GCN-GISEL-NEXT:    s_set_pc_i64 s[30:31]
   %vec1 = load <7 x i16>, ptr addrspace(1) %ptr1
   %insert = insertelement <7 x i16> %vec1, i16 20, i32 4
@@ -319,14 +316,13 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
 ; GCN-SDAG-NEXT:    v_mov_b32_e32 v16, 0x70
 ; GCN-SDAG-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60
 ; GCN-SDAG-NEXT:    v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0
 ; GCN-SDAG-NEXT:    v_mov_b32_e32 v54, 32
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 64
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v40, 16
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v49, 0
-; GCN-SDAG-NEXT:    v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0
-; GCN-SDAG-NEXT:    v_mov_b32_e32 v43, 0
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v14, 0xc8
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v39, 0
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v48, 64 :: v_dual_mov_b32 v40, 16
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v49, 0
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v43, 0
 ; GCN-SDAG-NEXT:    s_wait_loadcnt 0x7
 ; GCN-SDAG-NEXT:    global_store_b128 v[16:17], v[6:9], off
 ; GCN-SDAG-NEXT:    s_wait_loadcnt 0x6
@@ -408,15 +404,16 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
 ; GCN-GISEL-NEXT:    v_mov_b32_e32 v34, 0xc8
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v49, 0 :: v_dual_mov_b32 v50, 32
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v52, 48 :: v_dual_mov_b32 v51, 0
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v53, 0 :: v_dual_mov_b32 v54, 64
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v55, 0
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0x60
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v50, 32 :: v_dual_mov_b32 v49, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v54, 64
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v53, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v41, 0
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v42, 0x60
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0
-; GCN-GISEL-NEXT:    v_mov_b32_e32 v45, 0
 ; GCN-GISEL-NEXT:    s_wait_loadcnt 0x7
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v37, v9 :: v_dual_mov_b32 v36, v8
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v45, 0 :: v_dual_mov_b32 v37, v9
+; GCN-GISEL-NEXT:    v_mov_b32_e32 v36, v8
 ; GCN-GISEL-NEXT:    v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8
 ; GCN-GISEL-NEXT:    v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
 ; GCN-GISEL-NEXT:    s_wait_loadcnt 0x6
@@ -491,12 +488,11 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
 ; GCN-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GCN-SDAG-NEXT:    s_wait_xcnt 0x0
+; GCN-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
 ; GCN-SDAG-NEXT:    v_mov_b32_e32 v8, 12
 ; GCN-SDAG-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 8
-; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GCN-SDAG-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0
-; GCN-SDAG-NEXT:    s_wait_xcnt 0x0
-; GCN-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
 ; GCN-SDAG-NEXT:    v_mov_b32_e32 v12, 0
 ; GCN-SDAG-NEXT:    v_mov_b32_e32 v13, 0
 ; GCN-SDAG-NEXT:    s_wait_kmcnt 0x0
@@ -520,17 +516,16 @@ define amdgpu_kernel void @test_v7i16_load_store_kernel(ptr addrspace(1) %ptr1,
 ; GCN-GISEL:       ; %bb.0:
 ; GCN-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
 ; GCN-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GCN-GISEL-NEXT:    s_wait_xcnt 0x0
+; GCN-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
 ; GCN-GISEL-NEXT:    v_mov_b32_e32 v8, 0
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 2
-; GCN-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_lshlrev_b32 v4, 4, v0
-; GCN-GISEL-NEXT:    s_wait_xcnt 0x0
-; GCN-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
 ; GCN-GISEL-NEXT:    v_mov_b32_e32 v12, 4
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 6
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 8
 ; GCN-GISEL-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 10
-; GCN-GISEL-NEXT:    v_dual_mov_b32 v20, 12 :: v_dual_mov_b32 v19, 0
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 12
 ; GCN-GISEL-NEXT:    v_mov_b32_e32 v21, 0
 ; GCN-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GCN-GISEL-NEXT:    s_clause 0x1
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
new file mode 100644
index 0000000000000..586ddf627bd9e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
@@ -0,0 +1,3243 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefix=PAIR %s
+
+---
+name:            vopd_combine_low_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_low_vgprs
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_low_vgprs
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_mov_max_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_mov_max_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_mov_max_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MAX_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_mov_min_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_mov_min_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_mov_min_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_MIN_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_max_i32_max_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_max_i32_max_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MAX_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_MAX_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_max_i32_max_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = V_MAX_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_MAX_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MAX_I32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_MAX_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_min_i32_min_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_min_i32_min_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MIN_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_MIN_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_min_i32_min_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = V_MIN_I32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_MIN_I32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MIN_I32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_MIN_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_mov_sub_nc_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_mov_sub_nc_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_SUB_U32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_mov_sub_nc_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_SUB_U32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_SUB_U32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_mov_lshrrev_b32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_mov_lshrrev_b32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_LSHRREV_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_mov_lshrrev_b32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_LSHRREV_B32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_LSHRREV_B32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_mov_ashrrev_i32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_mov_ashrrev_i32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_ASHRREV_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_mov_ashrrev_i32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ASHRREV_I32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr1, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_ASHRREV_I32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_same_vgprs_banks
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_same_vgprs_banks
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, killed $vgpr5, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_same_vgprs_banks
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr6 = V_MUL_F32_e32 killed $vgpr0, killed $vgpr5, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr5, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_same_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_same_vgprs
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_same_vgprs
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 $vgpr0, $vgpr1, $vgpr0, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_same_dst_parity
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_same_dst_parity
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_e96_gfx1250 0, $vgpr1, 0, $vgpr1, 0, $vgpr0, 0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_x_fmaak
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_x_fmaak
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_x_fmaak
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1, $vgpr2 = V_DUAL_FMAAK_F32_X_MOV_B32_e32_gfx1250 killed $sgpr0, $vgpr0, 981467136, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+...
+
+---
+name:            vopd_combine_y_fmaak
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_y_fmaak
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_y_fmaak
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_FMAAK_F32_gfx1250 $vgpr0, killed $sgpr0, $vgpr0, 981467136, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_x_fmaak_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_x_fmaak_same_dst_parity
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_x_fmaak_same_dst_parity
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = V_FMAAK_F32 killed $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+...
+
+---
+name:            vopd_no_combine_y_fmaak_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_y_fmaak_same_dst_parity
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_FMAAK_F32 killed $sgpr0, killed $vgpr0, 981467136, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_y_fmaak_same_dst_parity
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_FMAAK_F32 killed $sgpr0, killed $vgpr0, 981467136, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr3 = V_FMAAK_F32 $sgpr0, $vgpr0, 981467136, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_literal_x
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_literal_x
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_literal_x
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 12345, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_literal_y
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_literal_y
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_literal_y
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr6 = V_DUAL_MUL_F32_e32_X_SUB_F32_e32_gfx1250 $vgpr0, $vgpr0, 12345, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+...
+
+# Below 2 tests cannot use VOPD because of the vdst parity and cannot use
+# VOPD3 because of the literal use.
+---
+name:            vopd_no_combine_literal_x
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_literal_x
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_literal_x
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_literal_y
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_literal_y
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_SUB_F32_e32 12345, killed $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_literal_y
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e32 killed $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_SUB_F32_e32 12345, killed $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_SUB_F32_e32 12345, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_u32_add_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_u32_add_f32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_u32_add_f32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4, $vgpr6 = V_DUAL_ADD_U32_e32_X_ADD_F32_e32_e96_gfx1250 $vgpr0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f32_add_u32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f32_add_u32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f32_add_u32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_e96_gfx1250 0, killed $vgpr2, 0, killed $vgpr3, $vgpr0, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name:            vopd_combine_add_u32_add_f32_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_u32_add_f32_same_dst_parity
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_u32_add_f32_same_dst_parity
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_gfx1250 killed $vgpr2, killed $vgpr3, killed $vgpr0, killed $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f32_add_u32_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f32_add_u32_same_dst_parity
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_ADD_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f32_add_u32_same_dst_parity
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5, $vgpr4 = V_DUAL_ADD_F32_e32_X_ADD_U32_e32_gfx1250 killed $vgpr2, killed $vgpr3, killed $vgpr0, killed $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name:            vopd_combine_lshl_lshl
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_lshl_lshl
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_LSHLREV_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_LSHLREV_B32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_lshl_lshl
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4, $vgpr6 = V_DUAL_LSHLREV_B32_e32_X_LSHLREV_B32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = V_LSHLREV_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_LSHLREV_B32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_ashr_ashr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_ashr_ashr
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_ASHRREV_I32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_ASHRREV_I32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_ashr_ashr
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4, $vgpr5 = V_DUAL_ASHRREV_I32_e32_X_ASHRREV_I32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = V_ASHRREV_I32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_ASHRREV_I32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_sub_u32_sub_u32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_sub_u32_sub_u32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_SUB_U32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_sub_u32_sub_u32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4, $vgpr5 = V_DUAL_SUB_U32_e32_X_SUB_U32_e32_e96_gfx1250 killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, implicit $exec, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = V_SUB_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_SUB_U32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_sub_u32_sub_u32_lit
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_sub_u32_sub_u32_lit
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_SUB_U32_e32 300, killed $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_sub_u32_sub_u32_lit
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = V_SUB_U32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_SUB_U32_e32 300, killed $vgpr2, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr4 = V_SUB_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_SUB_U32_e32 300, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fmac_fmac
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fmac_fmac
+    ; SCHED: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fmac_fmac
+    ; PAIR: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx1250 $vgpr1, $vgpr1, killed $vgpr2, killed $vgpr1, $vgpr1, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr3 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fmac_fmac_same_dst_parity
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fmac_fmac_same_dst_parity
+    ; SCHED: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr4, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fmac_fmac_same_dst_parity
+    ; PAIR: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2, $vgpr4 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_e96_gfx1250 0, $vgpr1, 0, $vgpr1, killed $vgpr2, 0, killed $vgpr1, 0, $vgpr1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr4 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fmac_fmac_same_dst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fmac_fmac_same_dst
+    ; SCHED: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fmac_fmac_same_dst
+    ; PAIR: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 killed $vgpr1, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    $vgpr2 = V_FMAC_F32_e32 $vgpr1, $vgpr1, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_add_f32_fadd_f32_same_dst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_add_f32_fadd_f32_same_dst
+    ; SCHED: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_add_f32_fadd_f32_same_dst
+    ; PAIR: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f64_add_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f64_add_f32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f64_add_f32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr8_vgpr9, 0, killed $vgpr2, 0, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f32_add_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f32_add_f64
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f32_add_f64
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr8_vgpr9, 0, killed $vgpr2, 0, killed $vgpr3, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = IMPLICIT_DEF
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_add_f64_add_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f64
+    ; SCHED: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f64
+    ; PAIR: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6_vgpr7 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_add_f64_add_f32_overlapping_dst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_dst
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_dst
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr8_vgpr9, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr3, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr8_vgpr9, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_add_f64_add_f32_overlapping_src_sub1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub1
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr5, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub1
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr5, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr5, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_add_f64_add_f32_overlapping_src_sub0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub0
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr4, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_add_f64_add_f32_overlapping_src_sub0
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 killed $vgpr0_vgpr1, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr2, killed $vgpr4, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr10_vgpr11, implicit $mode, implicit $exec
+    $vgpr6 = V_ADD_F32_e32 $vgpr2, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_fma
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_fma
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_fma_bank_conflict_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_fma_bank_conflict_src2
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_fma_bank_conflict_src2
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr10 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr10, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_add_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_add_f32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_add_f32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f32_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f32_fma
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f32_fma
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr7, $vgpr6 = V_DUAL_ADD_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_add_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_add_f64
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 killed $vgpr2_vgpr3, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_add_f64
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, killed $vgpr2_vgpr3, 0, killed $vgpr10_vgpr11, 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr2_vgpr3, $vgpr10_vgpr11, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_src0_mod_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_src0_mod_fma
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_src0_mod_fma
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 3, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_fma_src1_mod
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_fma_src1_mod
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 2, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_fma_src1_mod
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 2, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 2, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_fma_src2_mod
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_fma_src2_mod
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 3, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_fma_src2_mod
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 3, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 3, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_clamp_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_clamp_fma
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 1, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_clamp_fma
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 1, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_fma_omod
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_fma_omod
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_fma_omod
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 1, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_fma_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_fma_neg
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 1, $vgpr0, 1, $vgpr1, 1, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 1, killed $vgpr3, 1, killed $vgpr4, 1, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_fma_neg
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0, 1, $vgpr1, 1, killed $vgpr2, 1, killed $vgpr3, 1, killed $vgpr4, 1, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 1, $vgpr0, 1, $vgpr1, 1, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 1, $vgpr3, 1, $vgpr4, 1, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_fma_src0_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_fma_src0_neg
+    ; SCHED: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 1, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_fma_src0_neg
+    ; PAIR: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 1, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 1, $sgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $sgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_fma_src1_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_fma_src1_neg
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 1, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_fma_src1_neg
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, killed $vgpr3, 1, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 1, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_fma_src2_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_fma_src2_neg
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 1, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_fma_src2_neg
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 1, killed $vgpr2, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 1, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_f64_fma_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_f64_fma_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_f64_fma_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11, $vgpr9 = V_DUAL_FMA_F64_e64_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr4_vgpr5, 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = IMPLICIT_DEF
+    $vgpr8 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = V_FMA_F64_e64 1, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr8, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_lshl_add_u64_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_lshl_add_u64_fma
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_lshl_add_u64_fma
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_lshl_add_u64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_lshl_add_u64
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 killed $vgpr0_vgpr1, $vgpr1, killed $vgpr2_vgpr3, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_lshl_add_u64
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 killed $vgpr0_vgpr1, $vgpr1, killed $vgpr2_vgpr3, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr2, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+...
+
+---
+name:            vopd_no_combine_lshl_add_u64_fma_overlapping_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_overlapping_src2
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_overlapping_src2
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr1, $vgpr2_vgpr3, implicit $exec
+    $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_lshl_add_u64_fma_src0_conflict
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src0_conflict
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, killed $vgpr5, $vgpr2_vgpr3, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src0_conflict
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, killed $vgpr5, $vgpr2_vgpr3, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr1, 0, killed $vgpr3, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec
+    $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8 = V_FMA_F32_e64 0, $vgpr1, 0, $vgpr3, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_lshl_add_u64_fma_src1_conflict
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src1_conflict
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr5, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_lshl_add_u64_fma_src1_conflict
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr5, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6_vgpr7 = V_LSHL_ADD_U64_e64 $vgpr0_vgpr1, $vgpr5, $vgpr2_vgpr3, implicit $exec
+    $vgpr9 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr5, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_f64_fma_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_f64_fma_f32
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_f64_fma_f32
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11, $vgpr9 = V_DUAL_FMA_F64_e64_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, killed $vgpr6, 0, killed $vgpr8, 0, killed $vgpr7, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = IMPLICIT_DEF
+    $vgpr8 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr8, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fma_f64_fma_f32_overlapping_src1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fma_f64_fma_f32_overlapping_src1
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr3, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fma_f64_fma_f32_overlapping_src1
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4_vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr7 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, killed $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr12 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr9 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr3, 0, killed $vgpr7, 0, 0, implicit $mode, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr12 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr9 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr3, 0, $vgpr7, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_f32_add_f64_e32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e32
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e32
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_f32_add_f64_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e64
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e64
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_f32_add_f64_e64_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_f32_add_f64_e64_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_f32_add_f64_e64_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_ADD_F64_pseudo_e32_X_FMA_F32_e64_e96_gfx1250 1, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, killed $vgpr6, 0, killed $vgpr4, 0, killed $vgpr5, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr6, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr8_vgpr9 = V_ADD_F64_pseudo_e64 1, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fma_bitop
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_bitop
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, 0, 123, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_bitop
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5, $vgpr6 = V_DUAL_FMA_F32_e64_X_BITOP2_B32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, killed $vgpr3, killed $vgpr4, 123, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_BITOP3_B32_e64 $vgpr3, $vgpr4, 0, 123, implicit $exec
+...
+
+# Make sure bitop3 modifier does not count against constant bus limit.
+---
+name:            vopd_combine_fma_bitop_2_scalar_src
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fma_bitop_2_scalar_src
+    ; SCHED: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = V_FMA_F32_e64 0, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_BITOP3_B32_e64 killed $sgpr3, killed $vgpr4, 0, 123, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fma_bitop_2_scalar_src
+    ; PAIR: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5, $vgpr6 = V_DUAL_FMA_F32_e64_X_BITOP2_B32_e64_e96_gfx1250 0, $sgpr0, 0, $vgpr1, 0, killed $vgpr2, killed $sgpr3, killed $vgpr4, 123, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_BFM_B32_e64 killed $sgpr0, killed $vgpr1, implicit $exec
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $sgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = V_FMA_F32_e64 0, $sgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr7 = V_BFM_B32_e64 $sgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_BITOP3_B32_e64 $sgpr3, $vgpr4, 0, 123, implicit $exec
+...
+
+---
+name:            vopd_combine_bitop_mov_b32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_bitop_mov_b32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 0, 20, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_bitop_mov_b32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5, $vgpr3 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 killed $vgpr2, $vgpr0, $vgpr1, 20, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 0, 20, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec
+...
+
+---
+name:            vopd_no_combine_mov_b32_bitop_non_imm_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_mov_b32_bitop_non_imm_src2
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, killed $vgpr2, 20, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_mov_b32_bitop_non_imm_src2
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, killed $vgpr2, 20, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, $vgpr2, 20, implicit $exec
+...
+
+---
+name:            vopd_no_combine_mov_b32_bitop_non_zero_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_mov_b32_bitop_non_zero_src2
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, 1, 20, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_mov_b32_bitop_non_zero_src2
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr3 = V_BITOP3_B32_e64 killed $vgpr0, killed $vgpr1, 1, 20, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_BITOP3_B32_e64 $vgpr0, $vgpr1, 1, 20, implicit $exec
+...
+
+---
+name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; SCHED-LABEL: name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2
+    ; SCHED: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec
+    ; SCHED-NEXT: renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, killed $vgpr1, 128, implicit $exec
+    ; SCHED-NEXT: renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_bitop3_mov_dpp_vgpr_src2
+    ; PAIR: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec
+    ; PAIR-NEXT: renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, killed $vgpr4, killed $vgpr1, 128, implicit $exec
+    ; PAIR-NEXT: renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+
+    renamable $vgpr1 = V_MOV_B32_dpp killed $vgpr1, $vgpr3, 258, 15, 15, 0, implicit $exec
+    renamable $vgpr1 = V_BITOP3_B32_e64 killed $vgpr3, $vgpr4, killed $vgpr1, 128, implicit $exec
+    renamable $vgpr3 = V_MOV_B32_e32 -1, implicit $exec
+...
+
+---
+name:            vopd_combine_mov_or
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mov_or
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_OR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mov_or
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 84, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_OR_B32_e32 $vgpr1, $vgpr2, implicit $exec
+...
+
+---
+name:            vopd_combine_mov_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mov_and
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_AND_B32_e32 $vgpr1, killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mov_and
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 64, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_AND_B32_e32 $vgpr1, $vgpr2, implicit $exec
+...
+
+---
+name:            vopd_combine_mov_xor
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mov_xor
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_XOR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mov_xor
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 20, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_XOR_B32_e32 $vgpr1, $vgpr2, implicit $exec
+...
+
+---
+name:            vopd_combine_mov_xnor
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mov_xnor
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_XNOR_B32_e32 $vgpr1, killed $vgpr2, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mov_xnor
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3, $vgpr5 = V_DUAL_MOV_B32_e32_X_BITOP2_B32_e64_e96_gfx1250 $vgpr0, $vgpr1, killed $vgpr2, 65, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_XNOR_B32_e32 $vgpr1, $vgpr2, implicit $exec
+...
+
+# V_NOT_B32 can also be combined to BITOP2, but we need to come up with a fake src1
+# which would satisfy all register constraints and does not break liveness.
+# This is not trivial at the very least.
+---
+name:            vopd_combine_mov_not
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mov_not
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mov_not
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_NOT_B32_e32 $vgpr1, implicit $exec
+...
+
+---
+name:            vopd_combine_fadd_not
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fadd_not
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr2, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fadd_not
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr4 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_NOT_B32_e32 killed $vgpr2, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    $vgpr4 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr5 = V_NOT_B32_e32 $vgpr2, implicit $exec
+...
+
+---
+name:            vopd_combine_fadd_f64_not
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fadd_f64_not
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr11 = V_NOT_B32_e32 killed $vgpr6, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fadd_f64_not
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr10 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr11 = V_NOT_B32_e32 killed $vgpr6, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_ADD_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    $vgpr10 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr11 = V_NOT_B32_e32 $vgpr6, implicit $exec
+...
+
+---
+name:            vopd_no_combine_src1_imm
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_src1_imm
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_src1_imm
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, 1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_src2_imm
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_src2_imm
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_src2_imm
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, 1, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_src1_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_src1_sgpr
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, killed $sgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_src1_sgpr
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, killed $sgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr1 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $sgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_src2_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_src2_sgpr
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $sgpr1, 0, 0, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_src2_sgpr
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $sgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr1 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $sgpr1, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_fadd
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vcc_lo
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_fadd
+    ; SCHED: liveins: $vcc_lo
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit killed $vcc_lo
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_fadd
+    ; PAIR: liveins: $vcc_lo
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx1250 $vgpr0, $vgpr1, killed $vgpr3, killed $vgpr4, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $exec, implicit killed $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit $vcc
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_fma
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vcc_lo
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_fma
+    ; SCHED: liveins: $vcc_lo
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit killed $vcc_lo
+    ; SCHED-NEXT: $vgpr7 = V_FMA_F32_e64 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_fma
+    ; PAIR: liveins: $vcc_lo
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr5 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_FMA_F32_e64_e96_gfx1250 0, $vgpr0, 0, $vgpr1, $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, 0, killed $vgpr5, implicit $exec, implicit $mode, implicit $exec, implicit killed $vcc_lo, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr5 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $exec, implicit $vcc
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_FMA_F32_e64 0, $vgpr3, 0, $vgpr4, 0, $vgpr5, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_vcc_fadd
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_vcc_fadd
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vcc = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_vcc_fadd
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vcc = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vcc = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $vcc_lo, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_sgpr_fadd
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_sgpr_fadd
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_sgpr_fadd
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_neg_vcc_fadd
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_neg_vcc_fadd
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vcc = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 1, $vgpr1, killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_neg_vcc_fadd
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vcc = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 1, $vgpr1, killed $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vcc = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 1, $vgpr1, $vcc_lo, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vcc = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, killed $sgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_constant_bus_limit
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vcc = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, killed $sgpr0, 0, $vgpr1, killed $vcc_lo, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $sgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vcc = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $vgpr1, $vcc_lo, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $sgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vcc = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, killed $sgpr0, killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_cndmask_e64_vcc_fadd_sgpr_src1
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vcc = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, killed $sgpr0, killed $vcc_lo, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $sgpr3, killed $vgpr4, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $sgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vcc = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $sgpr0, $vcc_lo, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $sgpr3, $vgpr4, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_cndmask_e32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vcc_lo
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_cndmask_e32
+    ; SCHED: liveins: $vcc_lo
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr3, killed $vgpr4, implicit killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_cndmask_e32
+    ; PAIR: liveins: $vcc_lo
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, $vcc_lo, implicit $exec, implicit $exec, implicit killed $vcc_lo, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_CNDMASK_B32_e32 $vgpr3, $vgpr4, implicit $vcc, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e32_cndmask_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vcc_lo
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e32_cndmask_e64
+    ; SCHED: liveins: $vcc_lo
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e32_cndmask_e64
+    ; PAIR: liveins: $vcc_lo
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, $vcc_lo, 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr0, implicit $exec, implicit killed $vcc_lo, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e32_cndmask_e32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vcc_lo
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e32_cndmask_e32
+    ; SCHED: liveins: $vcc_lo
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr3, killed $vgpr4, implicit killed $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e32_cndmask_e32
+    ; PAIR: liveins: $vcc_lo
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_gfx1250 $vgpr0, $vgpr1, killed $vgpr3, killed $vgpr4, implicit $vcc_lo, implicit $exec, implicit $vcc_lo, implicit $exec, implicit killed $vcc_lo, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e32 $vgpr0, $vgpr1, implicit $vcc, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_CNDMASK_B32_e32 $vgpr3, $vgpr4, implicit $vcc, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_cndmask_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_cndmask_e64
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, killed $sgpr0, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_cndmask_e64
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $sgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, killed $sgpr0, 0, killed $vgpr3, 0, killed $vgpr4, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1 = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr1, $sgpr0, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr1, implicit $exec
+...
+
+---
+name:            vopd_combine_fadd_e64_fadd_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fadd_e64_fadd_e64
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e64 0, killed $vgpr3, 0, killed $vgpr2, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fadd_e64_fadd_e64
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_ADD_F32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 0, $vgpr1, 0, killed $vgpr3, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e64 0, $vgpr3, 0, $vgpr2, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_fadd_e64_neg_fadd_e32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_fadd_e64_neg_fadd_e32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 1, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_fadd_e64_neg_fadd_e32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_ADD_F32_e32_X_ADD_F32_e32_e96_gfx1250 0, $vgpr0, 1, $vgpr1, 0, killed $vgpr3, 0, killed $vgpr2, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 1, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_no_combine_fadd_e64_abs_neg_fadd_e32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_no_combine_fadd_e64_abs_neg_fadd_e32
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fadd_e64_abs_neg_fadd_e32
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr8 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ; PAIR-NEXT: $vgpr7 = V_ADD_F32_e32 killed $vgpr3, killed $vgpr2, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr6 = V_ADD_F32_e64 0, $vgpr0, 3, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr8 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr2, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_mul_f64_e64_sub_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mul_f64_e64_sub_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MUL_F64_pseudo_e64 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_SUB_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mul_f64_e64_sub_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MUL_F64_pseudo_e32_X_SUB_F32_e32_e96_gfx1250 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MUL_F64_pseudo_e64 1, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_SUB_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_mul_f64_e32_subrev_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_mul_f64_e32_subrev_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MUL_F64_pseudo_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_SUBREV_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_mul_f64_e32_subrev_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MUL_F64_pseudo_e32_X_SUBREV_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MUL_F64_pseudo_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_SUBREV_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_min_num_f64_e64_mul_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_min_num_f64_e64_mul_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MIN_NUM_F64_e64 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_MUL_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_min_num_f64_e64_mul_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MIN_NUM_F64_e32_X_MUL_F32_e32_e96_gfx1250 1, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MIN_NUM_F64_e64 1, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_MUL_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_min_num_f64_e32_mul_legacy_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_min_num_f64_e32_mul_legacy_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MIN_NUM_F64_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_MUL_LEGACY_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_min_num_f64_e32_mul_legacy_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MIN_NUM_F64_e32_X_MUL_LEGACY_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MIN_NUM_F64_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_MUL_LEGACY_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_max_num_f64_e64_min_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_max_num_f64_e64_min_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MAX_NUM_F64_e64 0, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_MIN_F32_e64 1, killed $vgpr6, 0, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_max_num_f64_e64_min_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MAX_NUM_F64_e32_X_MIN_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 1, killed $vgpr2_vgpr3, 1, killed $vgpr6, 0, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MAX_NUM_F64_e64 0, $vgpr0_vgpr1, 1, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_MIN_F32_e64 1, $vgpr6, 0, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_max_num_f64_e32_max_f32_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_max_num_f64_e32_max_f32_neg
+    ; SCHED: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr8_vgpr9 = V_MAX_NUM_F64_e32 $vgpr0_vgpr1, killed $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_MAX_F32_e64 0, killed $vgpr6, 1, killed $vgpr4, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_max_num_f64_e32_max_f32_neg
+    ; PAIR: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr8_vgpr9, $vgpr7 = V_DUAL_MAX_NUM_F64_e32_X_MAX_F32_e32_e96_gfx1250 0, $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, killed $vgpr6, 1, killed $vgpr4, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr8_vgpr9 = V_MAX_NUM_F64_e32 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr7 = V_MAX_F32_e64 0, $vgpr6, 1, $vgpr4, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_add_f64_fmac_f32_e64_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_add_f64_fmac_f32_e64_neg
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr2_vgpr3 = V_ADD_F64_pseudo_e32 10, killed $vgpr10_vgpr11, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr6 = V_FMAC_F32_e64 0, $vgpr0, 1, $vgpr1, 0, killed $vgpr6, 0, 0, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec
+    ; SCHED-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_add_f64_fmac_f32_e64_neg
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr10_vgpr11 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr2_vgpr3, $vgpr6 = V_DUAL_ADD_F64_pseudo_e32_X_FMAC_F32_e32_e96_gfx1250 0, 10, 0, killed $vgpr10_vgpr11, 0, $vgpr0, 1, $vgpr1, killed $vgpr6, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec
+    ; PAIR-NEXT: $vgpr5 = V_BFM_B32_e64 killed $vgpr0, killed $vgpr1, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr6 = IMPLICIT_DEF
+    $vgpr10_vgpr11 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = V_ADD_F64_pseudo_e32 10, $vgpr10_vgpr11, implicit $mode, implicit $exec
+    $vgpr2_vgpr3 = V_MOV_B64_e32 10, implicit $exec
+    $vgpr5 = V_BFM_B32_e64 $vgpr0, $vgpr1, implicit $exec
+    $vgpr6 = V_FMAC_F32_e64 0, $vgpr0, 1, $vgpr1, 0, $vgpr6, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name:            vopd_combine_cndmask_e64_neg_cndmask_e64_neg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; SCHED-LABEL: name: vopd_combine_cndmask_e64_neg_cndmask_e64_neg
+    ; SCHED: $vgpr0 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; SCHED-NEXT: $vcc = IMPLICIT_DEF
+    ; SCHED-NEXT: $vgpr6 = V_CNDMASK_B32_e64 1, killed $vgpr0, 0, killed $vgpr1, $vcc_lo, implicit $exec
+    ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e64 1, killed $vgpr3, 0, killed $vgpr4, killed $vcc_lo, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_combine_cndmask_e64_neg_cndmask_e64_neg
+    ; PAIR: $vgpr0 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; PAIR-NEXT: $vcc = IMPLICIT_DEF
+    ; PAIR-NEXT: $vgpr6, $vgpr7 = V_DUAL_CNDMASK_B32_e32_X_CNDMASK_B32_e32_e96_gfx1250 1, killed $vgpr0, 0, killed $vgpr1, $vcc_lo, 1, killed $vgpr3, 0, killed $vgpr4, killed $vcc_lo, implicit $exec, implicit $exec, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vcc = IMPLICIT_DEF
+    $vgpr6 = V_CNDMASK_B32_e64 1, $vgpr0, 0, $vgpr1, $vcc_lo, implicit $exec
+    $vgpr7 = V_CNDMASK_B32_e64 1, $vgpr3, 0, $vgpr4, $vcc_lo, implicit $exec
+...
+
+---
+name: vopd_no_combine_dpp
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    ; SCHED-LABEL: name: vopd_no_combine_dpp
+    ; SCHED: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; SCHED-NEXT: {{  $}}
+    ; SCHED-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    ; SCHED-NEXT: $vgpr0 = V_ADD_F32_e64_dpp killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_dpp
+    ; PAIR: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; PAIR-NEXT: {{  $}}
+    ; PAIR-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    ; PAIR-NEXT: $vgpr0 = V_ADD_F32_e64_dpp killed $vgpr0, 0, killed $vgpr2, 0, killed $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+      $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+      $vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+...
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s
new file mode 100644
index 0000000000000..81b79cb8c28da
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_errs.s
@@ -0,0 +1,326 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s 2>&1 | FileCheck %s -check-prefix=GFX12 --implicit-check-not=error: --strict-whitespace
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction can use only one literal.
+//===----------------------------------------------------------------------===//
+
+v_dual_mul_f32      v11, 0x24681357, v2          ::  v_dual_mul_f32      v10, 0xbabe, v5
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_mul_f32      v11, 0x24681357, v2          ::  v_dual_mul_f32      v10, 0xbabe, v5
+// GFX12-NEXT:{{^}}                                                                              ^
+
+//===----------------------------------------------------------------------===//
+// When 2 different literals are specified, show the location
+// of the last literal which is not a KImm, if any.
+//===----------------------------------------------------------------------===//
+
+v_dual_fmamk_f32    v122, v74, 0xa0172923, v161  ::  v_dual_lshlrev_b32  v247, 0xbabe, v99
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, v74, 0xa0172923, v161  ::  v_dual_lshlrev_b32  v247, 0xbabe, v99
+// GFX12-NEXT:{{^}}                                                                               ^
+
+v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, v3, v1, 0xbabe
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, v3, v1, 0xbabe
+// GFX12-NEXT:{{^}}                        ^
+
+v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, 0xbabe, v1, 0xbabe
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, 0xbabe, v1, 0xbabe
+// GFX12-NEXT:{{^}}                                                                              ^
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0x1234, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0x1234, v162
+// GFX12-NEXT:{{^}}                                                                                   ^
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, s0, 0x1234, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, s0, 0x1234, v162
+// GFX12-NEXT:{{^}}                          ^
+
+//===----------------------------------------------------------------------===//
+// Check that assembler detects a different literal regardless of its location.
+//===----------------------------------------------------------------------===//
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0x1234, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0x1234, v162
+// GFX12-NEXT:{{^}}                                                                                   ^
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0x1234, 0xdeadbeef, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0x1234, 0xdeadbeef, v162
+// GFX12-NEXT:{{^}}                                                                                   ^
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0x1234, v161     ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0xdeadbeef, 0x1234, v161     ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX12-NEXT:{{^}}                                                                                   ^
+
+v_dual_fmamk_f32    v122, 0x1234, 0xdeadbeef, v161     ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, 0x1234, 0xdeadbeef, v161     ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX12-NEXT:{{^}}                                                                                   ^
+
+//===----------------------------------------------------------------------===//
+// When 2 different literals are specified and all literals are KImm,
+// show the location of the last KImm literal.
+//===----------------------------------------------------------------------===//
+
+v_dual_fmamk_f32    v122, s0, 0xdeadbeef, v161   ::  v_dual_fmamk_f32  v123, s0, 0x1234, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, s0, 0xdeadbeef, v161   ::  v_dual_fmamk_f32  v123, s0, 0x1234, v162
+// GFX12-NEXT:{{^}}                                                                                 ^
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction cannot use more than 2 scalar operands
+//===----------------------------------------------------------------------===//
+
+// 2 different SGPRs + LITERAL
+
+v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_max_i32       v247, s75, v98
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_max_i32       v247, s75, v98
+// GFX12-NEXT:{{^}}                                                                                ^
+
+v_dual_mov_b32      v247, s73                    ::  v_dual_fmaak_f32     v122, s74, v161, 2.741
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_mov_b32      v247, s73                    ::  v_dual_fmaak_f32     v122, s74, v161, 2.741
+// GFX12-NEXT:{{^}}                                                                                ^
+
+v_dual_fmamk_f32    v122, s0, 0xbabe, v161       ::  v_dual_fmamk_f32     v123, s1, 0xbabe, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_fmamk_f32    v122, s0, 0xbabe, v161       ::  v_dual_fmamk_f32     v123, s1, 0xbabe, v162
+// GFX12-NEXT:{{^}}                                                                                ^
+
+// 2 different SGPRs + VCC
+
+v_dual_add_f32      v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s2, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_add_f32      v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s2, v3
+// GFX12-NEXT:{{^}}                                                                              ^
+
+v_dual_cndmask_b32   v6, s1, v3                  ::  v_dual_add_f32       v255, s2, v2
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32   v6, s1, v3                  ::  v_dual_add_f32       v255, s2, v2
+// GFX12-NEXT:{{^}}                                                                                ^
+
+v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s2, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s2, v3
+// GFX12-NEXT:{{^}}                                                                              ^
+
+v_dual_cndmask_b32 v1, s2, v3, vcc_lo :: v_dual_cndmask_b32 v2, s3, v4, vcc_lo
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v1, s2, v3, vcc_lo :: v_dual_cndmask_b32 v2, s3, v4, vcc_lo
+// GFX12-NEXT:{{^}}                                                                        ^
+
+// SGPR + LITERAL + VCC
+
+v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_mov_b32       v254, 0xbabe
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_mov_b32       v254, 0xbabe
+// GFX12-NEXT:{{^}}                                                                                ^
+
+v_dual_cndmask_b32  v255, 0xbabe, v2             ::  v_dual_mov_b32       v254, s1
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32  v255, 0xbabe, v2             ::  v_dual_mov_b32       v254, s1
+// GFX12-NEXT:{{^}}                                                                                ^
+
+v_dual_cndmask_b32  v255, s3, v2                 ::  v_dual_fmamk_f32     v254, v1, 0xbabe, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32  v255, s3, v2                 ::  v_dual_fmamk_f32     v254, v1, 0xbabe, v162
+// GFX12-NEXT:{{^}}                          ^
+
+v_dual_cndmask_b32  v255, v1, v2                 ::  v_dual_fmamk_f32     v254, s3, 0xbabe, v162
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32  v255, v1, v2                 ::  v_dual_fmamk_f32     v254, s3, 0xbabe, v162
+// GFX12-NEXT:{{^}}                                                                                ^
+
+// SGPR + VCC + VCC_LO
+// This is a special case because implicit VCC operand has 64 bit size.
+// SP3 does not accept this instruction as well.
+
+v_dual_add_f32      v255, vcc_lo, v2             ::  v_dual_cndmask_b32   v6, s1, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
+// GFX12-NEXT:{{^}}v_dual_add_f32      v255, vcc_lo, v2             ::  v_dual_cndmask_b32   v6, s1, v3
+// GFX12-NEXT:{{^}}                                                                              ^
+
+// FIXME: Error should be 'unsupported instruction'
+v_dual_add_f32 v255, v4, v2 :: v_dual_and_b32 v6, v1, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12-NEXT:{{^}}v_dual_add_f32 v255, v4, v2 :: v_dual_and_b32 v6, v1, v3
+// GFX12-NEXT:{{^}}^
+
+v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v7, v101, v3, 0xaf123456
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v255, v4, v2 :: v_dual_fmaak_f32 v7, v101, v3, 0xaf123456
+// GFX12-NEXT:{{^}}                                                    ^
+
+v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v4, 130, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd
+// GFX12-NEXT:{{^}}v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v4, 130, v6
+// GFX12-NEXT:{{^}}                                            ^
+
+// Even though it could be represented as VOPD3, fmac reads its dst and bank constraints still apply to src2.
+v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: one dst register must be even and the other odd
+// GFX12-NEXT:{{^}}v_dual_fmac_f32 v255, s105, v2 :: v_dual_fmac_f32 v7, s1, v3
+// GFX12-NEXT:{{^}}                                                  ^
+
+// Destination should be distinct even if not checked for parity in VOPD3
+v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v7, v1, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct
+// GFX12-NEXT:{{^}}v_dual_fmac_f32 v7, v4, v2 :: v_dual_fmac_f32 v7, v1, v3
+// GFX12-NEXT:{{^}}                                              ^
+
+v_dual_add_f32 v7, v4, v2 :: v_dual_add_f32 v7, v5, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct
+// GFX12-NEXT:{{^}}v_dual_add_f32 v7, v4, v2 :: v_dual_add_f32 v7, v5, v3
+// GFX12-NEXT:{{^}}                                            ^
+
+//===----------------------------------------------------------------------===//
+// A 64-bit operand shall not have bank conflicts with both subregs.
+// There is also NO exception that a 64 bit operand can start whith the same
+// register as 32 bit.
+//===----------------------------------------------------------------------===//
+v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v8, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v8, v6
+// GFX12-NEXT:{{^}}                                                                ^
+
+v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v9, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v9, v6
+// GFX12-NEXT:{{^}}                                                                ^
+
+v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v4, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], v[4:5], v[8:9] :: v_dual_ashrrev_i32 v5, v4, v6
+// GFX12-NEXT:{{^}}                                                                ^
+
+v_dual_add_f64 v[2:3], 1, v[8:9] :: v_dual_ashrrev_i32 v3, v7, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: dst registers must be distinct
+// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], 1, v[8:9] :: v_dual_ashrrev_i32 v3, v7, v6
+// GFX12-NEXT:{{^}}                                                       ^
+
+//===----------------------------------------------------------------------===//
+// Literals not supported by VOPD3. Inline literals can only be encoded for
+// src0, but not for vsrc1 or vsrc2.
+//===----------------------------------------------------------------------===//
+v_dual_add_f64 v[2:3], 100.0, v[8:9] :: v_dual_ashrrev_i32 v4, v7, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12-NEXT:{{^}}v_dual_add_f64 v[2:3], 100.0, v[8:9] :: v_dual_ashrrev_i32 v4, v7, v6
+// GFX12-NEXT:{{^}}                       ^
+
+v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, 0, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, 0, v8
+// GFX12-NEXT:{{^}}                                                             ^
+
+v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, v0, 0
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v2, v255 :: v_dual_fma_f32 v7, 1, v0, 0
+// GFX12-NEXT:{{^}}                                                                 ^
+
+//===----------------------------------------------------------------------===//
+// Check that we properly detect bank conflicts if instruction is derived from
+// VOP3.
+//===----------------------------------------------------------------------===//
+v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v8, v7, v6
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src0 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v8, v7, v6
+// GFX12-NEXT:{{^}}                                                    ^
+
+v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v6, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src1 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v6, v8
+// GFX12-NEXT:{{^}}                                                        ^
+
+v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v8, v7
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fma_f32 v3, v5, v8, v7
+// GFX12-NEXT:{{^}}                                                            ^
+
+v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fmac_f32 v7, v5, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v1, v4, v2, v3 :: v_dual_fmac_f32 v7, v5, v8
+// GFX12-NEXT:{{^}}                           ^
+
+v_dual_fmac_f32 v7, v5, v8 :: v_dual_fma_f32 v1, v4, v2, v3
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: src2 operands must use different VGPR banks
+// GFX12-NEXT:{{^}}v_dual_fmac_f32 v7, v5, v8 :: v_dual_fma_f32 v1, v4, v2, v3
+// GFX12-NEXT:{{^}}                                                         ^
+
+//===----------------------------------------------------------------------===//
+// ABS modifiers are not supported
+//===----------------------------------------------------------------------===//
+v_dual_fma_f32 v255, |s105|, v0, v1 :: v_dual_add_nc_u32 v7, s1, v0
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, |s105|, v0, v1 :: v_dual_add_nc_u32 v7, s1, v0
+// GFX12-NEXT:{{^}}                      ^
+
+v_dual_fma_f32 v255, s105, abs(v0), v1 :: v_dual_fma_f32 v7, s1, v0, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, abs(v0), v1 :: v_dual_fma_f32 v7, s1, v0, v8
+// GFX12-NEXT:{{^}}                               ^
+
+v_dual_fma_f32 v255, s105, v0, |v1| :: v_dual_fma_f32 v7, s1, v0, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, |v1| :: v_dual_fma_f32 v7, s1, v0, v8
+// GFX12-NEXT:{{^}}                                ^
+
+v_dual_add_nc_u32 v255, s105, v0 :: v_dual_fma_f32 v7, |1|, v0, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_add_nc_u32 v255, s105, v0 :: v_dual_fma_f32 v7, |1|, v0, v8
+// GFX12-NEXT:{{^}}                                                        ^
+
+v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, -|v0|, v8
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, -|v0|, v8
+// GFX12-NEXT:{{^}}                                                              ^
+
+v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, v0, -abs(v8)
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, s105, v0, v1 :: v_dual_fma_f32 v7, s1, v0, -abs(v8)
+// GFX12-NEXT:{{^}}                                                                     ^
+
+v_dual_mul_f64 v[6:7], -|v[2:3]|, v[4:5] :: v_dual_fma_f32 v255, -s105, v2, v1
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: ABS not allowed in VOPD3 instructions
+// GFX12-NEXT:{{^}}v_dual_mul_f64 v[6:7], -|v[2:3]|, v[4:5] :: v_dual_fma_f32 v255, -s105, v2, v1
+// GFX12-NEXT:{{^}}                         ^
+
+//===----------------------------------------------------------------------===//
+// No modifiers on non-fp part of an instruction
+//===----------------------------------------------------------------------===//
+v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_lshrrev_b32 v7, -s1, v0
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_lshrrev_b32 v7, -s1, v0
+// GFX12-NEXT:{{^}}                                                              ^
+
+v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_max_i32 v7, s1, -v0
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12-NEXT:{{^}}v_dual_fma_f32 v255, -s105, v0, v1 :: v_dual_max_i32 v7, s1, -v0
+// GFX12-NEXT:{{^}}                                                              ^
+
+v_dual_add_nc_u32 v7, -s1, v0 :: v_dual_fma_f32 v255, -s105, v0, v1
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
+// GFX12-NEXT:{{^}}v_dual_add_nc_u32 v7, -s1, v0 :: v_dual_fma_f32 v255, -s105, v0, v1
+// GFX12-NEXT:{{^}}                      ^
+
+v_dual_sub_nc_u32 v7, s1, -v0 :: v_dual_fma_f32 v255, -s105, v0, v1
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
+// GFX12-NEXT:{{^}}v_dual_sub_nc_u32 v7, s1, -v0 :: v_dual_fma_f32 v255, -s105, v0, v1
+// GFX12-NEXT:{{^}}                          ^
+
+v_dual_cndmask_b32 v28, sext(v15), v15, s46 :: v_dual_cndmask_b32 v29, v13, -v13, s46
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v28, sext(v15), v15, s46 :: v_dual_cndmask_b32 v29, v13, -v13, s46
+// GFX12-NEXT:{{^}}                        ^
+
+
+v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, sext(v13), -v13, s46
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
+// GFX12-NEXT:{{^}}v_dual_cndmask_b32 v28, -v15, v15, s46 :: v_dual_cndmask_b32 v29, sext(v13), -v13, s46
+// GFX12-NEXT:{{^}}                                                                  ^
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s
new file mode 100644
index 0000000000000..cdd9f301e2506
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vopd_features.s
@@ -0,0 +1,109 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck -check-prefix=GFX12 %s
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction can use one or more literals,
+// provided that they are identical.
+//===----------------------------------------------------------------------===//
+
+// LITERAL
+
+v_dual_mul_f32      v11, v1, v2                  ::  v_dual_mul_f32      v10, 0x24681357, v5
+// GFX12: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+
+// LITERAL*2
+
+v_dual_mul_f32      v11, 0x24681357, v2          ::  v_dual_mul_f32      v10, 0x24681357, v5
+// GFX12: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
+
+// LITERAL + KIMM
+
+v_dual_add_f32      v5, 0xaf123456, v2           ::  v_dual_fmaak_f32     v6, v3, v1, 0xaf123456 ;
+// GFX12: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
+
+// KIMM + LITERAL
+
+v_dual_fmamk_f32    v122, v74, 0xa0172923, v161  ::  v_dual_lshlrev_b32   v247, 0xa0172923, v99
+// GFX12: encoding: [0x4a,0x43,0xa3,0xc8,0xff,0xc6,0xf6,0x7a,0x23,0x29,0x17,0xa0]
+
+// KIMM*2
+
+v_dual_fmamk_f32    v122, 0xdeadbeef, 0xdeadbeef, v161 ::  v_dual_fmamk_f32  v123, 0xdeadbeef, 0xdeadbeef, v162
+// GFX12: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
+
+//===----------------------------------------------------------------------===//
+// A VOPD instruction can use 2 scalar operands,
+// but implicit VCC must be counted in.
+//===----------------------------------------------------------------------===//
+
+// 2 different SGPRs
+
+v_dual_mul_f32      v0, s1, v2                   ::  v_dual_mul_f32       v3, s4, v5
+// GFX12: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
+
+// SGPR + LITERAL
+
+v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_max_i32       v247, v160, v98
+// GFX12: encoding: [0x4a,0x42,0x6f,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+
+v_dual_mov_b32      v247, v160                   ::  v_dual_fmaak_f32     v122, s74, v161, 2.741
+// GFX12: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
+
+// SGPR*2 + LITERAL
+
+v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_max_i32       v247, s74, v98
+// GFX12: encoding: [0x4a,0x42,0x6f,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// SGPR + LITERAL*2
+
+v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_fmamk_f32     v3, v6, 2.741, v1
+// GFX12: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// SGPR*2 + LITERAL*2
+
+v_dual_fmaak_f32    v122, s74, v161, 2.741       ::  v_dual_fmamk_f32     v3, s74, 2.741, v1
+// GFX12: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL + VCC
+
+v_dual_fmaak_f32    v122, v0, v161, 2.741       ::  v_dual_cndmask_b32   v1, v2, v3
+// GFX12: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL*2 + VCC
+
+v_dual_fmaak_f32    v122, v0, v161, 2.741       ::  v_dual_cndmask_b32   v1, 2.741, v3
+// GFX12: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
+
+// LITERAL*2 + VCC*2
+
+v_dual_cndmask_b32  v255, 0xbabe, v2             ::  v_dual_cndmask_b32   v6, 0xbabe, v3
+// GFX12: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
+
+// SGPR*2 + VCC
+
+v_dual_add_f32      v255, s105, v2               ::  v_dual_cndmask_b32   v6, s105, v3
+// GFX12: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
+
+// SGPR*2 + VCC*2
+
+v_dual_cndmask_b32  v255, s1, v2                 ::  v_dual_cndmask_b32   v6, s1, v3
+// GFX12: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
+
+// VCC*2
+
+v_dual_add_f32      v255, vcc_lo, v2             ::  v_dual_cndmask_b32   v6, v1, v3
+// GFX12: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]
+
+//===----------------------------------------------------------------------===//
+// A VOPD OpY mov_b32 instruction uses SRC2 source-cache if OpX is also mov_b32
+//===----------------------------------------------------------------------===//
+
+v_dual_mov_b32      v2, v5                       ::  v_dual_mov_b32       v3, v1
+// GFX12: encoding: [0x05,0x01,0x10,0xca,0x01,0x01,0x02,0x02]
+
+//===----------------------------------------------------------------------===//
+// SRCX0 and SRCY0 may use the same bank if they are using the same VGPR; same for
+// VSRCX1 and VSRCY1.
+//===----------------------------------------------------------------------===//
+
+v_dual_add_f32 v2, v2, v5 :: v_dual_mul_f32 v3, v2, v5
+// GFX12: encoding: [0x02,0x0b,0x06,0xc9,0x02,0x0b,0x02,0x02]



More information about the llvm-commits mailing list