[llvm] 0483c91 - [AMDGPU] gfx11 CodeGen for new DPP instructions

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 5 07:47:52 PDT 2022


Author: Joe Nash
Date: 2022-07-05T10:17:59-04:00
New Revision: 0483c91eee9e60c82ff09ec7fd84ea0519122b79

URL: https://github.com/llvm/llvm-project/commit/0483c91eee9e60c82ff09ec7fd84ea0519122b79
DIFF: https://github.com/llvm/llvm-project/commit/0483c91eee9e60c82ff09ec7fd84ea0519122b79.diff

LOG: [AMDGPU] gfx11 CodeGen for new DPP instructions

Modifies the GCNDPPCombine pass to enable DPP formation for the new DPP
instruction in gfx11, namely VOP3 encoded instructions with DPP and VOPC
with DPP.

Depends on D128656

Reviewed By: #amdgpu, rampitec

Differential Revision: https://reviews.llvm.org/D128682

Added: 
    llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
    llvm/test/CodeGen/AMDGPU/vopc_dpp.mir

Modified: 
    llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/VOPInstructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
    llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
    llvm/test/CodeGen/AMDGPU/dpp_combine.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 1cd880eaa48e..5d254518c67a 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -143,13 +143,20 @@ bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
 }
 
 int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
-  auto DPP32 = AMDGPU::getDPPOp32(Op);
+  int DPP32 = AMDGPU::getDPPOp32(Op);
   if (IsShrinkable) {
     assert(DPP32 == -1);
-    auto E32 = AMDGPU::getVOPe32(Op);
+    int E32 = AMDGPU::getVOPe32(Op);
     DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
   }
-  return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
+  if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)
+    return DPP32;
+  int DPP64 = -1;
+  if (ST->hasVOP3DPP())
+    DPP64 = AMDGPU::getDPPOp64(Op);
+  if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)
+    return DPP64;
+  return -1;
 }
 
 // tracks the register operand definition and returns:
@@ -188,6 +195,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
          MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
 
+  bool HasVOP3DPP = ST->hasVOP3DPP();
   auto OrigOp = OrigMI.getOpcode();
   auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
   if (DPPOp == -1) {
@@ -201,10 +209,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
 
   bool Fail = false;
   do {
-    auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
-    assert(Dst);
-    DPPInst.add(*Dst);
-    int NumOperands = 1;
+    int NumOperands = 0;
+    if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
+      DPPInst.add(*Dst);
+      ++NumOperands;
+    }
+    if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
+      if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
+        DPPInst.add(*SDst);
+        ++NumOperands;
+      }
+      // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
+    }
 
     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
     if (OldIdx != -1) {
@@ -230,7 +246,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
                                           AMDGPU::OpName::src0_modifiers)) {
       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
                                           AMDGPU::OpName::src0_modifiers));
-      assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+      assert(HasVOP3DPP ||
+             (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
       DPPInst.addImm(Mod0->getImm());
       ++NumOperands;
     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
@@ -253,7 +270,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
                                           AMDGPU::OpName::src1_modifiers)) {
       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
                                           AMDGPU::OpName::src1_modifiers));
-      assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+      assert(HasVOP3DPP ||
+             (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
       DPPInst.addImm(Mod1->getImm());
       ++NumOperands;
     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
@@ -261,7 +279,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
       DPPInst.addImm(0);
       ++NumOperands;
     }
-    if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+    if (Src1) {
       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
         Fail = true;
@@ -270,8 +289,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
       DPPInst.add(*Src1);
       ++NumOperands;
     }
-
-    if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+    if (auto *Mod2 =
+            TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
+      assert(NumOperands ==
+             AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
+      assert(HasVOP3DPP ||
+             (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
+      DPPInst.addImm(Mod2->getImm());
+      ++NumOperands;
+    }
+    auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
+    if (Src2) {
       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
@@ -279,8 +307,62 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
         break;
       }
       DPPInst.add(*Src2);
+      ++NumOperands;
+    }
+    if (HasVOP3DPP) {
+      auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
+      if (ClampOpr &&
+          AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) {
+        DPPInst.addImm(ClampOpr->getImm());
+      }
+      auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
+      if (VdstInOpr &&
+          AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) {
+        DPPInst.add(*VdstInOpr);
+      }
+      auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
+      if (OmodOpr &&
+          AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) {
+        DPPInst.addImm(OmodOpr->getImm());
+      }
+      // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
+      // all 1.
+      if (auto *OpSelOpr =
+              TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
+        auto OpSel = OpSelOpr->getImm();
+        if (OpSel != 0) {
+          LLVM_DEBUG(dbgs() << "  failed: op_sel must be zero\n");
+          Fail = true;
+          break;
+        }
+        if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1)
+          DPPInst.addImm(OpSel);
+      }
+      if (auto *OpSelHiOpr =
+              TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
+        auto OpSelHi = OpSelHiOpr->getImm();
+        // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
+        // the bitmask for 3 op_sel_hi bits set
+        assert(Src2 && "Expected vop3p with 3 operands");
+        if (OpSelHi != 7) {
+          LLVM_DEBUG(dbgs() << "  failed: op_sel_hi must be all set to one\n");
+          Fail = true;
+          break;
+        }
+        if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1)
+          DPPInst.addImm(OpSelHi);
+      }
+      auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
+      if (NegOpr &&
+          AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) {
+        DPPInst.addImm(NegOpr->getImm());
+      }
+      auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
+      if (NegHiOpr &&
+          AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) {
+        DPPInst.addImm(NegHiOpr->getImm());
+      }
     }
-
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
@@ -531,8 +613,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
     }
 
     bool IsShrinkable = isShrinkable(OrigMI);
-    if (!(IsShrinkable || TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
-      LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
+    if (!(IsShrinkable ||
+          ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||
+            TII->isVOP3(OrigOp)) &&
+           ST->hasVOP3DPP()) ||
+          TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
+      LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3/3P/C\n");
+      break;
+    }
+    if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {
+      LLVM_DEBUG(dbgs() << "  failed: can't combine v_cmpx\n");
       break;
     }
 
@@ -543,9 +633,12 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
       break;
     }
 
+    auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
     assert(Src0 && "Src1 without Src0?");
-    if (Src1 && Src1->isIdenticalTo(*Src0)) {
-      assert(Src1->isReg());
+    if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
+                         (Src2 && Src2->isIdenticalTo(*Src0)))) ||
+        (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
+                         (Src2 && Src2->isIdenticalTo(*Src1))))) {
       LLVM_DEBUG(
           dbgs()
           << "  " << OrigMI

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 311f9f68e675..1b411eb83eb3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1241,6 +1241,9 @@ namespace AMDGPU {
   LLVM_READONLY
   int getDPPOp32(uint16_t Opcode);
 
+  LLVM_READONLY
+  int getDPPOp64(uint16_t Opcode);
+
   LLVM_READONLY
   int getBasicFromSDWAOp(uint16_t Opcode);
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 78adb0af91ce..c54c70b82e7e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2194,21 +2194,21 @@ class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp,
                        "$sdst",
                        "$vdst"),
                     ""); // use $sdst for VOPC
-  string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
-  string isrc1 = !if(!eq(NumSrcArgs, 1), "",
-                     !if(!eq(NumSrcArgs, 2), " $src1",
-                                             " $src1,"));
-  string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
-
-  string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
-  string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
-                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
-                                             " $src1_modifiers,"));
-  string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
-
-  string src0 = !if(Src0HasMods, fsrc0, isrc0);
-  string src1 = !if(Src1HasMods, fsrc1, isrc1);
-  string src2 = !if(Src2HasMods, fsrc2, isrc2);
+  string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+  string src1nomods = !if(!eq(NumSrcArgs, 1), "",
+                    !if(!eq(NumSrcArgs, 2), " $src1",
+                                            " $src1,"));
+  string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+  string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+  string src1mods = !if(!eq(NumSrcArgs, 1), "",
+                    !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+                                            " $src1_modifiers,"));
+  string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+
+  string src0 = !if(Src0HasMods, src0mods, src0nomods);
+  string src1 = !if(Src1HasMods, src1mods, src1nomods);
+  string src2 = !if(Src2HasMods, src2mods, src2nomods);
   string opsel = !if(HasOpSel, "$op_sel", "");
   string 3PMods = !if(IsVOP3P,
                       !if(HasOpSel, "$op_sel_hi", "")
@@ -2559,8 +2559,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
   // the asm operand name via this HasModifiers flag
   field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
   field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp,
-   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods,
-   HasSrc2FloatMods, DstVT >.ret;
+   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
+   HasModifiers, DstVT>.ret;
   field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret;
   field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret;
   field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
@@ -2800,6 +2800,14 @@ def getDPPOp32 : InstrMapping {
   let ValueCols = [["DPP"]];
 }
 
+def getDPPOp64 : InstrMapping {
+  let FilterClass = "VOP";
+  let RowFields = ["OpName"];
+  let ColFields = ["AsmVariantName"];
+  let KeyCol = ["VOP3"];
+  let ValueCols = [["VOP3_DPP"]];
+}
+
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
   let FilterClass = "Commutable_REV";

diff  --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 8cd3d2fe2c47..2376347bafcc 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1215,7 +1215,9 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO
   let IsMAI    = !if(Features.IsMAI,    1, P.IsMAI);
   let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
 
-  let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers));
+  let HasModifiers =
+      !if (Features.IsMAI, 0,
+           !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers));
 }
 
 class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Base<P, Features> {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
index 09a79eda47c6..178b60463f1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s
 
 ; FIXME: Merge with DAG test
 
@@ -29,6 +30,18 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
 ; GFX10-NEXT:    v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
 ; GFX10-NEXT:    global_store_dword v1, v0, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x00,0x02,0x00]
 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: dpp_test:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x2c ; encoding: [0x80,0x00,0x00,0xf4,0x2c,0x00,0x00,0xf8]
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24 ; encoding: [0x00,0x00,0x04,0xf4,0x24,0x00,0x00,0xf8]
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
+; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 ; encoding: [0x80,0x00,0x10,0xca,0x02,0x00,0x00,0x01]
+; GFX11-NEXT:    v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
+; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1] ; encoding: [0x00,0x00,0x6a,0xdc,0x01,0x00,0x00,0x00]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
+; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
   %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
@@ -58,6 +71,18 @@ define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) {
 ; GFX10-NEXT:    v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x02,0x7e,0x01,0x01,0x00,0x11]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] ; encoding: [0x00,0x80,0x74,0xdc,0x02,0x00,0x00,0x00]
 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: mov_dpp64_test:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24 ; encoding: [0x00,0x00,0x08,0xf4,0x24,0x00,0x00,0xf8]
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0 ; encoding: [0x80,0x02,0x04,0x7e]
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
+; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; encoding: [0x02,0x00,0x10,0xca,0x03,0x00,0x00,0x00]
+; GFX11-NEXT:    v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x00,0x11]
+; GFX11-NEXT:    v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; encoding: [0xfa,0x02,0x02,0x7e,0x01,0x01,0x00,0x11]
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1] ; encoding: [0x00,0x00,0x6e,0xdc,0x02,0x00,0x00,0x00]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
+; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
   %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) #0
   store i64 %tmp0, i64 addrspace(1)* %out
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
index 846e469e816b..a9b6a70f416f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 
 define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
 ; GFX8-LABEL: dpp_test:
@@ -29,6 +30,19 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: dpp_test:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b64 s[2:3], s[0:1], 0x2c
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX11-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
@@ -65,6 +79,20 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i
 ; GFX10-NEXT:    v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; GFX10-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1]
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: update_dpp64_test:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GFX11-NEXT:    global_load_b64 v[0:1], v4, s[0:1]
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX11-NEXT:    v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+; GFX11-NEXT:    global_store_b64 v4, v[2:3], s[0:1]
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id
   %load = load i64, i64 addrspace(1)* %gep

diff  --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
index e55c80436f8f..04c889add9f8 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
@@ -1,9 +1,10 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS
 
 ; GCN-LABEL: {{^}}dpp64_ceil:
-; GCN:           global_load_dwordx2 [[V:v\[[0-9:]+\]]],
+; GCN:           global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
 ; DPP64:         v_ceil_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp64_ceil(i64 addrspace(1)* %arg, i64 %in1) {
@@ -19,7 +20,7 @@ define amdgpu_kernel void @dpp64_ceil(i64 addrspace(1)* %arg, i64 %in1) {
 }
 
 ; GCN-LABEL: {{^}}dpp64_rcp:
-; GCN:           global_load_dwordx2 [[V:v\[[0-9:]+\]]],
+; GCN:           global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
 ; DPP64:         v_rcp_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp64_rcp(i64 addrspace(1)* %arg, i64 %in1) {
@@ -50,12 +51,12 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(i64 addrspace(1)* %arg, i64
 }
 
 ; GCN-LABEL: {{^}}dpp64_div:
-; GCN:            global_load_dwordx2 [[V:v\[[0-9:]+\]]],
-; DPPMOV64:       v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
-; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
-; GFX10-COUNT-2:  v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
-; GCN:            v_div_scale_f64
-; GCN:            v_rcp_f64_e32
+; GCN:               global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
+; DPPMOV64:          v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
+; GFX90A-COUNT-2:    v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
+; GFX10PLUS-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
+; GCN:               v_div_scale_f64
+; GCN:               v_rcp_f64_e32
 define amdgpu_kernel void @dpp64_div(i64 addrspace(1)* %arg, i64 %in1) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id

diff  --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
index d84821e72644..d127f7342421 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
@@ -1,8 +1,9 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 
 ; GCN-LABEL: {{^}}dpp_add:
-; GCN: global_load_dword [[V:v[0-9]+]],
+; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
 ; GCN: v_add_{{(nc_)?}}u32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp_add(i32 addrspace(1)* %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -15,7 +16,7 @@ define amdgpu_kernel void @dpp_add(i32 addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}dpp_ceil:
-; GCN: global_load_dword [[V:v[0-9]+]],
+; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
 ; GCN: v_ceil_f32_dpp [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp_ceil(i32 addrspace(1)* %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -30,7 +31,7 @@ define amdgpu_kernel void @dpp_ceil(i32 addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}dpp_fadd:
-; GCN: global_load_dword [[V:v[0-9]+]],
+; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
 ; GCN: v_add_f32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp_fadd(i32 addrspace(1)* %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
new file mode 100644
index 000000000000..8fef788d26b4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -0,0 +1,810 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+
+---
+
+# GCN-label: name: vop3
+# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
+# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
+name: vop3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+
+    %5:sreg_32_xm0_xexec = IMPLICIT_DEF
+    %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
+
+    %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
+
+    ; should not be combined because src2 literal is illegal
+    %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
+
+    ; should not be combined because src1 imm is illegal
+    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
+...
+
+# Regression test for src_modifiers on base u16 opcode
+# GCN-label: name: vop3_u16
+# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 4, %5, 8, %5, 0, 0, 1, 15, 15, 1, implicit $exec
+name: vop3_u16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
+...
+
+name: vop3p
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: vop3p
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
+    ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = IMPLICIT_DEF
+
+    ; this should not be combined because op_sel is not zero
+    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should not be combined because op_sel_hi is not all set
+    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
+
+    %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
+
+    %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
+
+    %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
+
+    %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
+
+...
+
+# when the DPP source isn't a src0 operand the operation should be commuted if possible
+# GCN-LABEL: name: dpp_commute_shrink
+# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
+# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
+# GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+name: dpp_commute_shrink
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+
+    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
+
+    %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+    %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
+
+    %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
+
+    %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+    %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
+
+    %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+    %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
+
+...
+
+# do not combine, dpp arg used twice
+# GCN-label: name: dpp_arg_twice
+# GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
+# GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
+name: dpp_arg_twice
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
+
+    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
+
+    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
+
+...
+
+# when the dpp source isn't a src0 operand the operation should be commuted if possible
+# GCN-label: name: dpp_commute_e64
+# GCN: %4:vgpr_32  = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
+# GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec
+# GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
+name: dpp_commute_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+
+    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
+
+    %5:vgpr_32 = IMPLICIT_DEF
+    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
+
+    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
+
+    %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
+    %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
+
+    ; this cannot be combined because immediate as src0 isn't commutable
+    %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
+    %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
+...
+
+---
+
+# check for floating point modifiers
+# GCN-LABEL: name: add_f32_e64
+# GCN:   %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN:   %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN:   %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN:   %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+
+name: add_f32_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    ; this should be combined as e64
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
+
+    ; this should be combined and shrunk as all modifiers are default
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should be combined and shrunk as modifiers other than abs|neg are default
+    %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should be combined as e64
+    %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
+...
+
+# check for e64 modifiers
+# GCN-LABEL: name: add_u32_e64
+# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec
+
+name: add_u32_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    ; this should be combined  and shrunk as all modifiers are default
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
+
+    ; this should be combined as _e64
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
+...
+
+# tests on sequences of dpp consumers
+# GCN-LABEL: name: dpp_seq
+# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
+# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
+# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+# broken sequence:
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+
+name: dpp_seq
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e32 %3, %1,  implicit $exec
+    %5:vgpr_32 = V_SUB_U32_e32 %1, %3,  implicit $exec
+    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
+
+    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %8:vgpr_32 = V_ADD_U32_e32 %7, %1,  implicit $exec
+    ; this breaks the sequence
+    %9:vgpr_32 = V_SUB_U32_e32 5, %7,  implicit $exec
+...
+
+# tests on sequences of dpp consumers followed by control flow
+# GCN-LABEL: name: dpp_seq_cf
+# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
+
+name: dpp_seq_cf
+tracksRegLiveness: true
+body: |
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
+    %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
+    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
+
+    %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec
+    %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+...
+
+# GCN-LABEL: name: old_in_
diff _bb
+# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec
+
+name: old_in_
diff _bb
+tracksRegLiveness: true
+body: |
+  bb.0:
+    successors: %bb.1
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
+...
+
+# old reg def is in 
diff  BB but bound_ctrl:1 - can combine
+# GCN-LABEL: name: old_in_
diff _bb_bctrl_zero
+# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec
+
+name: old_in_
diff _bb_bctrl_zero
+tracksRegLiveness: true
+body: |
+  bb.0:
+    successors: %bb.1
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
+...
+
+# EXEC mask changed between def and use - cannot combine
+# GCN-LABEL: name: exec_changed
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+
+name: exec_changed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
+    %5:sreg_64 = COPY $exec, implicit-def $exec
+    %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
+...
+
+# test if $old definition is correctly tracked through subreg manipulation pseudos
+
+# GCN-LABEL: name: mul_old_subreg
+# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
+
+name:            mul_old_subreg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vreg_64 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
+    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+    %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
+...
+
+# GCN-LABEL: name: add_old_subreg
+# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
+
+name:            add_old_subreg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vreg_64 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
+...
+
+# GCN-LABEL: name: add_old_subreg_undef
+# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec
+
+name:            add_old_subreg_undef
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vreg_64 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
+...
+
+# Test instruction which does not have modifiers in VOP1 form but does in DPP form.
+# GCN-LABEL: name: dpp_vop1
+# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: dpp_vop1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
+...
+
+# Test instruction which does not have modifiers in VOP2 form but does in DPP form.
+# GCN-LABEL: name: dpp_min
+# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: dpp_min
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
+...
+
+# Test an undef old operand
+# GCN-LABEL: name: dpp_undef_old
+# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: dpp_undef_old
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
+...
+
+# Do not combine a dpp mov which writes a physreg.
+# GCN-LABEL: name: phys_dpp_mov_dst
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
+name: phys_dpp_mov_dst
+tracksRegLiveness: true
+body: |
+  bb.0:
+    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
+...
+
+# Do not combine a dpp mov which reads a physreg.
+# GCN-LABEL: name: phys_dpp_mov_old_src
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
+name: phys_dpp_mov_old_src
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
+...
+
+# Do not combine a dpp mov which reads a physreg.
+# GCN-LABEL: name: phys_dpp_mov_src
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
+name: phys_dpp_mov_src
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_both_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %9:vgpr_32 = IMPLICIT_DEF
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_both_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_first_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_first_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_second_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_second_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_none_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_none_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_exec_changed
+# GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN:   S_BRANCH %bb.1
+# GCN: bb.1:
+# GCN:   %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
+# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_exec_changed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    S_BRANCH %bb.1
+
+  bb.1:
+    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_subreg
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
+# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
+# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_subreg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
+    %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_src2_reject
+#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
+#GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
+name: dpp_reg_sequence_src2_reject
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    ; use of dpp arg as src2, reject
+    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
+    ; cannot commute src0 and src2, and %4.sub0 already rejected, reject
+    %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_src2
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
+#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: dpp_reg_sequence_src2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
+...
+
+# GCN-LABEL: name: dpp64_add64_impdef
+# GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp64_add64_impdef
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %0:vreg_64 = IMPLICIT_DEF
+    %1:vreg_64 = IMPLICIT_DEF
+    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
+    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name:  dpp64_add64_undef
+# GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp64_add64_undef
+tracksRegLiveness: true
+body: |
+  bb.0:
+    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
+    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+
+# GCN-LABEL: name: cndmask_with_src2
+# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
+# GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec
+name: cndmask_with_src2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:sreg_32_xm0_xexec = IMPLICIT_DEF
+    %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
+
+    ; src2 is legal for _e64
+    %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
+    %7:sreg_32_xm0_xexec = IMPLICIT_DEF
+    %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
+...
+
+---
+
+# Make sure flags aren't dropped
+# GCN-LABEL: name: flags_add_f32_e64
+# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: flags_add_f32_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0, implicit %4
+
+...
+
+# GCN-LABEL: name: dont_combine_more_than_one_operand
+# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+name: dont_combine_more_than_one_operand
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+...
+
+# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
+# GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
+# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dont_combine_more_than_one_operand_dpp_reg_sequence
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
+    %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 89d526472b56..481161e68a66 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-OPT,PREGFX10,PREGFX10-OPT %s
 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-NOOPT,PREGFX10,PREGFX10-NOOPT %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-OPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-enable-vopd=0 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=VI,VI-OPT %s
 
 ; FIXME: The register allocator / scheduler should be able to avoid these hazards.
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
index b5890ff08587..4d20e8c5e1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX8-OPT,GCN-OPT %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -O0 -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX8-NOOPT %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GCN-OPT %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-enable-vopd=0 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11,GCN-OPT %s
 
 ; GCN-LABEL: {{^}}dpp_test:
 ; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
@@ -26,7 +27,7 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
 
 
 ; GCN-LABEL: {{^}}dpp_test1:
-; GFX10: v_add_nc_u32_e32 [[REG:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GFX10,GFX11: v_add_nc_u32_e32 [[REG:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
@@ -51,7 +52,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}update_dpp64_test:
-; GCN:     load_dwordx2 v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
+; GCN:     load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
 ; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 ; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) {
@@ -65,12 +66,13 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i
 
 ; GCN-LABEL: {{^}}update_dpp64_imm_old_test:
 ; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_LO:[0-9]+]], 0x3afaedd9
-; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047
+; GFX8-OPT-DAG,GFX10-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047
+; GFX11-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047
 ; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x3afaedd9
 ; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x7047
-; GCN-DAG: load_dwordx2 v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
+; GCN-DAG: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]]
 ; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_LO]], v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
-; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
+; GFX8-OPT-DAG,GFX10-DAG,GFX11-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 define amdgpu_kernel void @update_dpp64_imm_old_test(i64 addrspace(1)* %arg, i64 %in2) {

diff  --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
new file mode 100644
index 000000000000..e880530eb546
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -0,0 +1,68 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+
+---
+
+name: vopc
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: vopc
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN: V_CMP_LT_F32_e32_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
+    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
+    ; GCN: V_CMP_CLASS_F16_e32_dpp [[DEF]], 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+    ; GCN: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp [[DEF]], 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
+    ; GCN: V_CMP_CLASS_F32_e32_dpp [[DEF]], 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+    ; GCN: V_CMP_NGE_F16_e32_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
+    ; GCN: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp [[DEF]], 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+    ; GCN: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc
+    ; GCN: V_CMP_GT_I32_e32_dpp [[DEF]], [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:vgpr_32 = IMPLICIT_DEF
+
+    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+
+    ; unsafe to combine cmpx
+    %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    V_CMPX_EQ_I16_e32 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
+
+    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    V_CMP_CLASS_F16_e32 %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
+
+    %7:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %8:sgpr_32 = V_CMP_GE_F16_e64 1, %7, 0, %0, 1, implicit $mode, implicit $exec
+
+    ; unsafe to combine cmpx
+    %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    V_CMPX_GT_U32_nosdst_e64 %9, %0, implicit-def $exec, implicit $mode, implicit $exec
+
+    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
+
+    ; shrink
+    %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %14:sgpr_32 = V_CMP_NGE_F16_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
+
+    ; do not shrink, sdst used
+    %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %16:sgpr_32 = V_CMP_NGE_F16_e64 0, %15, 0, %0, 0, implicit $mode, implicit $exec
+    %17:sgpr_32 = S_AND_B32 %16, 10101, implicit-def $scc
+
+    ; commute
+    %18:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
+
+...


        


More information about the llvm-commits mailing list