[llvm] [AMDGPU][MC] Support src modifiers for v_mov_b32 and v_movrel* instructions (PR #76498)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 12 06:39:25 PST 2024


https://github.com/ankurepa updated https://github.com/llvm/llvm-project/pull/76498

>From 685cd9b0bc8bbb388f1319124a59538eafed0586 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 09:06:10 +0100
Subject: [PATCH 1/8] [AMDGPU][MC] Support src modifiers for v_mov_b32 and
 v_movrel* instructions

Resolve #54795
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   2 +
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |  71 +++++--
 .../global-atomic-fadd.f32-no-rtn.ll          |  12 +-
 .../GlobalISel/global-atomic-fadd.f32-rtn.ll  |  12 +-
 llvm/test/CodeGen/AMDGPU/dpp64_combine.mir    |   4 +-
 llvm/test/CodeGen/AMDGPU/dpp_combine.mir      | 180 ++++++++---------
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 182 +++++++++---------
 .../AMDGPU/global-atomic-fadd.f32-no-rtn.ll   |  32 +--
 .../AMDGPU/global-atomic-fadd.f32-rtn.ll      |  12 +-
 .../CodeGen/AMDGPU/inserted-wait-states.mir   |   4 +-
 llvm/test/CodeGen/AMDGPU/remat-vop.mir        |  24 +--
 .../AMDGPU/schedule-ilp-liveness-tracking.mir |   8 +-
 .../AMDGPU/sdwa-peephole-instr-gfx10.mir      |   2 +-
 .../CodeGen/AMDGPU/sdwa-peephole-instr.mir    |   2 +-
 llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir  |   3 +-
 llvm/test/CodeGen/AMDGPU/vopc_dpp.mir         |  34 ++--
 llvm/test/CodeGen/AMDGPU/wqm.mir              |   4 +-
 llvm/test/MC/AMDGPU/gfx10_asm_vop1.s          |  36 ++++
 .../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s   |  60 ++++++
 .../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s |  30 +++
 .../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s |  36 ++++
 .../test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s |  36 ++++
 .../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s   |  25 +++
 .../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s |  12 ++
 .../MC/Disassembler/AMDGPU/gfx10_vop3.txt     |  33 ++++
 .../AMDGPU/gfx11_dasm_vop1_dpp16.txt          |  60 ++++++
 .../AMDGPU/gfx11_dasm_vop1_dpp8.txt           |  30 +++
 .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt      |  36 ++++
 .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt      |  36 ++++
 .../gfx12_dasm_vop3_from_vop1_dpp16.txt       |  24 +++
 .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt |  12 ++
 31 files changed, 777 insertions(+), 277 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ebe23a5eac57b5..e315fca0f4bf97 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,6 +2600,8 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
 
     for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
       const MachineOperand &SrcOp = MI.getOperand(I);
+      if(I == 2)
+        MovDPP.addImm(0); // add src modifier
       assert(!SrcOp.isFPImm());
       if (SrcOp.isImm()) {
         APInt Imm(64, SrcOp.getImm());
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac97..4bb1972cb0ea38 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -222,13 +222,20 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
   let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
   let InsVOPDY = (ins Src0RC32:$src0Y);
   let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
+
+  let HasModifiers = 1;
+  let HasClamp = 1;
+
+  let Src0Mod = FP32InputMods;
+  let Src0ModVOP3DPP = FPVRegInputMods;
+  let Src0ModDPP = FPVRegInputMods;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+  defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
 
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
-defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+  let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+    defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
 } // End isMoveImm = 1
 
 // FIXME: Specify SchedRW for READFIRSTLANE_B32
@@ -369,9 +376,21 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
 }
 
 // Restrict src0 to be VGPR
+def VOP_PERMLANE : VOPProfile<[i32, i32, untyped, untyped]> {
+  let Src0RC32 = VRegSrc_32;
+  let Src0RC64 = VRegSrc_32;
+}
+
 def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
   let Src0RC32 = VRegSrc_32;
   let Src0RC64 = VRegSrc_32;
+
+  let HasModifiers = 1;
+  let HasClamp = 1;
+
+  let Src0Mod = FPVRegInputMods;
+  let Src0ModVOP3DPP = FPVRegInputMods;
+  let Src0ModDPP = FPVRegInputMods;
 }
 
 // Special case because there are no true output operands.  Hack vdst
@@ -384,7 +403,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
 
   let Outs = (outs);
   let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
-  let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
+
+  let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+
   let Asm32 = getAsm32<1, 1>.ret;
 
   let OutsSDWA = (outs Src0RC32:$vdst);
@@ -394,13 +415,20 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
   let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
 
   let OutsDPP = (outs Src0RC32:$vdst);
-  let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+  let InsDPP = (ins Src0RC32:$old,
+                    FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
+                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsDPP16 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
                       dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                       bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
   let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
-  let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+  let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
   let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
 
+  let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
+
+
   let OutsVOP3DPP = (outs Src0RC64:$vdst);
   let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
   let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -413,6 +441,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
 
   let HasDst = 0;
   let EmitDst = 1; // force vdst emission
+  let HasModifiers = 1;
+  let HasClamp = 1;
+
+  let Src0Mod = !if(!eq(Src1RC, VSrc_b32), FP32InputMods, FPVRegInputMods);
+  let Src0ModVOP3DPP = FPVRegInputMods;
 }
 
 def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
@@ -658,9 +691,9 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
 
 let SubtargetPredicate = isGFX11Plus in {
   // Restrict src0 to be VGPR
-  def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
+  def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_PERMLANE,
                                       getVOP1Pat64<int_amdgcn_permlane64,
-                                                   VOP_MOVRELS>.ret,
+                                                   VOP_PERMLANE>.ret,
                                       /*VOP1Only=*/ 1>;
   defm V_MOV_B16_t16    : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
   defm V_NOT_B16        : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
@@ -1252,18 +1285,18 @@ def V_MOV_B32_indirect_read : VPseudoInstSI<
 let OtherPredicates = [isGFX8Plus] in {
 
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+  (i32 (int_amdgcn_mov_dpp (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp_ctrl, timm:$row_mask,
                            timm:$bank_mask, timm:$bound_ctrl)),
-  (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+  (V_MOV_B32_dpp VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
                        (as_i32timm $row_mask), (as_i32timm $bank_mask),
                        (as_i1timm $bound_ctrl))
 >;
 
 class UpdateDPPPat<ValueType vt> : GCNPat <
-  (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
+  (vt (int_amdgcn_update_dpp vt:$old,(vt(VOP3Mods vt:$src, i32:$src0_modifiers)), timm:$dpp_ctrl,
                               timm:$row_mask, timm:$bank_mask,
                               timm:$bound_ctrl)),
-  (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+  (V_MOV_B32_dpp VGPR_32:$old,i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
                  (as_i32timm $row_mask), (as_i32timm $bank_mask),
                  (as_i1timm $bound_ctrl))
 >;
@@ -1357,20 +1390,18 @@ defm V_CVT_PK_F32_BF8    : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
 
 let OtherPredicates = [isGFX10Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
-  (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+  (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
 } // End OtherPredicates = [isGFX10Only]
-
 //===----------------------------------------------------------------------===//
 // GFX11
 //===----------------------------------------------------------------------===//
-
 let OtherPredicates = [isGFX11Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
-  (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+  (V_MOV_B32_dpp8_gfx11  VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
 } // End OtherPredicates = [isGFX11Only]
@@ -1381,8 +1412,8 @@ def : GCNPat <
 
 let OtherPredicates = [isGFX12Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
-  (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+  (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
 } // End OtherPredicates = [isGFX12Only]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index bfb2ecde783a63..c6b6572082d05a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -174,22 +174,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX90A_GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
   ; GFX90A_GFX940-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63
   ; GFX90A_GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index d2c42292a03642..b74ac41ed813ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -176,24 +176,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ; GFX11-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec
   ; GFX11-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
   ; GFX11-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX11-NEXT:   [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec
   ; GFX11-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], 0, [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15
   ; GFX11-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]]
   ; GFX11-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index dfaa7b4efac39c..b9446a09b03028 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -39,8 +39,8 @@ body: |
 
 # DPP64 does not support all control values and must be split to become legal
 # GCN-LABEL: name: dpp64_illegal_ctrl
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
-# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
 # GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
 # GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec
 name: dpp64_illegal_ctrl
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index becc2bb095cc4b..265762f9c83b8a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -25,29 +25,29 @@ body: |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; VOP2
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
 
-    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
     %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
 
-    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
     %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
 
-    %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
 
     ; VOP1
-    %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
 
-    %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+    %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
     %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
 
-    %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
     %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
 
-    %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
 ...
 
@@ -109,29 +109,29 @@ body: |
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
     ; VOP2
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
 
-    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
     %6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
 
-    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
     %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
 
-    %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
 
     ; VOP1
-    %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
 
-    %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+    %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
     %14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
 
-    %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
     %16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
 
-    %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
 ...
 
@@ -158,19 +158,19 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
 
     %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 0, implicit $exec
     %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 15, 15, 0, implicit $exec
     %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 15, 0, implicit $exec
     %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
 ...
 
@@ -196,19 +196,19 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
 
     %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
     %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
     %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
 ...
 
@@ -234,19 +234,19 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
 
     %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 1, implicit $exec
     %7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 1, implicit $exec
     %10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 1, implicit $exec
     %13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
 ...
 
@@ -268,28 +268,28 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec
 
     %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
     %7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
     %13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec
 
     %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
     %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec
 
     ; this cannot be combined because immediate as src0 isn't commutable
     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
+    %18:vgpr_32 = V_MOV_B32_dpp %17, 0, %0, 1, 14, 15, 0, implicit $exec
     %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
 ...
 
@@ -297,7 +297,7 @@ body:             |
 
 # check for floating point modifiers
 # GCN-LABEL: name: add_f32_e64
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
 # GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
 # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
 # GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -314,19 +314,19 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this shouldn't be combined as omod is set
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
 
     ; this should be combined as all modifiers are default
-    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
 
     ; this should be combined as modifiers other than abs|neg are default
-    %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
 
     ; this shouldn't be combined as modifiers aren't abs|neg
-    %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
 ...
 
@@ -346,11 +346,11 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this should be combined as all modifiers are default
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
 
     ; this shouldn't be combined as clamp is set
-    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
 ...
 
@@ -368,7 +368,7 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this shouldn't be combined as the carry-out is used
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec
 
     S_NOP 0, implicit %5
@@ -380,7 +380,7 @@ body:             |
 # GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
 # broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
 
 name: dpp_seq
 tracksRegLiveness: true
@@ -391,12 +391,12 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
     %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
 
-    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec
     ; this breaks the sequence
     %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec
@@ -418,7 +418,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
     %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -450,7 +450,7 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
 ...
 
@@ -471,13 +471,13 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
 ...
 
 # EXEC mask changed between def and use - cannot combine
 # GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
 
 name: exec_changed
 tracksRegLiveness: true
@@ -488,7 +488,7 @@ body: |
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
     %5:sreg_64 = COPY $exec, implicit-def $exec
     %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -511,7 +511,7 @@ body: |
     %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
-    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
     %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
 ...
 
@@ -528,7 +528,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
-    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
     %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
 ...
 
@@ -545,7 +545,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
-    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
     %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
 ...
 
@@ -557,7 +557,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
 ...
 
@@ -569,7 +569,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
 ...
 
@@ -580,43 +580,43 @@ name: dpp_undef_old
 tracksRegLiveness: true
 body: |
   bb.0:
-    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which writes a physreg.
 # GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
 name: phys_dpp_mov_dst
 tracksRegLiveness: true
 body: |
   bb.0:
-    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which reads a physreg.
 # GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
 name: phys_dpp_mov_old_src
 tracksRegLiveness: true
 body: |
   bb.0:
-    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which reads a physreg.
 # GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
 name: phys_dpp_mov_src
 tracksRegLiveness: true
 body: |
   bb.0:
-    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
 ...
 
@@ -637,8 +637,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -649,7 +649,7 @@ body: |
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,7 +673,7 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
@@ -687,8 +687,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -698,8 +698,8 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -712,8 +712,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -723,8 +723,8 @@ body: |
 # GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 # GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN:   S_BRANCH %bb.1
 # GCN: bb.1:
@@ -739,8 +739,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     S_BRANCH %bb.1
 
@@ -753,8 +753,8 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
 # GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
@@ -768,8 +768,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
     %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
@@ -803,7 +803,7 @@ body: |
 ...
 
 # GCN-LABEL: name: dpp64_add64_first_combined
-# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
+# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, 0, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
 # GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1
 # GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec
@@ -827,7 +827,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:sreg_64_xexec = IMPLICIT_DEF
     %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
 ...
@@ -847,7 +847,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
     S_ENDPGM 0, implicit %4
 
@@ -862,7 +862,7 @@ body: |
     liveins: $vgpr0, $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
-    %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
 ...
 
@@ -876,8 +876,8 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -903,7 +903,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2
-  ; GCN-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec
+  ; GCN-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[PHI]], 323, 15, 15, 0, implicit $exec
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -925,7 +925,7 @@ body:             |
 
   bb.1:
     %4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2
-    %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %1, 0, %4, 323, 15, 15, 0, implicit $exec
 
   bb.2:
     %6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fe1345e29f133d..fc87170a1c8689 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -19,7 +19,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = COPY $vgpr2
     %3:vgpr_32 = IMPLICIT_DEF
-    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
 
     %5:sreg_32_xm0_xexec = IMPLICIT_DEF
     %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
@@ -27,11 +27,11 @@ body:             |
     %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
 
     ; should not be combined because src2 literal is illegal
-    %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
 
     ; should not be combined on subtargets where src1 imm is illegal
-    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
 ...
 ---
@@ -58,23 +58,23 @@ body:             |
     %4:vgpr_32 = IMPLICIT_DEF
 
     ; should be combined because src2 allows sgpr
-    %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
 
     ; should be combined only on subtargets that allow sgpr for src1
-    %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
     %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
 
     ; should be combined only on subtargets that allow sgpr for src1
-    %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
     %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
 
     ; should be combined only on subtargets that allow inlinable constants for src1
-    %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
     %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
 
     ; should not be combined when literal constants are used
-    %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %13:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
     %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
 ...
 ---
@@ -93,9 +93,9 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = COPY $vgpr2
     %3:vgpr_32 = IMPLICIT_DEF
-    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %5, 1, 15, 15, 1, implicit $exec
     %7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
 ...
 
@@ -111,9 +111,9 @@ body:             |
     ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
     ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
-    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
     ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
     ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
     ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -125,23 +125,23 @@ body:             |
     %3:vgpr_32 = IMPLICIT_DEF
 
     ; this should not be combined because op_sel is not zero
-    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
 
     ; this should not be combined because op_sel_hi is not all set
-    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
 
-    %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
 
-    %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
 
-    %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
 
-    %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %14:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
 
 ...
@@ -158,7 +158,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = COPY $vgpr2
     %3:vgpr_32 = IMPLICIT_DEF
-    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
 ...
 
@@ -179,23 +179,23 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
 
     %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
     %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
     %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
 
     %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
     %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
 
 ...
@@ -215,13 +215,13 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
 
-    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
 
-    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
 
 ...
@@ -243,24 +243,24 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
 
     %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
 
     %5:vgpr_32 = IMPLICIT_DEF
-    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 1, implicit $exec
     %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
 
     %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
     %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
 
     %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
+    %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 14, 15, 0, implicit $exec
     %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
 
     ; this cannot be combined because immediate as src0 isn't commutable
     %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
+    %16:vgpr_32 = V_MOV_B32_dpp %15, 0, %0, 1, 14, 15, 0, implicit $exec
     %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
 ...
 
@@ -284,19 +284,19 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this should be combined as e64
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
 
     ; this should be combined and shrunk as all modifiers are default
-    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
 
     ; this should be combined and shrunk as modifiers other than abs|neg are default
-    %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
 
     ; this should be combined as e64
-    %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
 ...
 
@@ -316,11 +316,11 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this should be combined  and shrunk as all modifiers are default
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
 
     ; this should be combined as _e64
-    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
 ...
 
@@ -330,7 +330,7 @@ body:             |
 # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
 # broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
 
 name: dpp_seq
 tracksRegLiveness: true
@@ -341,12 +341,12 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1,  implicit $exec
     %5:vgpr_32 = V_SUB_U32_e32 %1, %3,  implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
 
-    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %8:vgpr_32 = V_ADD_U32_e32 %7, %1,  implicit $exec
     ; this breaks the sequence
     %9:vgpr_32 = V_SUB_U32_e32 5, %7,  implicit $exec
@@ -368,7 +368,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
     %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -400,7 +400,7 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
 ...
 
@@ -421,13 +421,13 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
 ...
 
 # EXEC mask changed between def and use - cannot combine
 # GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
 
 name: exec_changed
 tracksRegLiveness: true
@@ -438,7 +438,7 @@ body: |
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
     %5:sreg_64 = COPY $exec, implicit-def $exec
     %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -461,7 +461,7 @@ body: |
     %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
-    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
     %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
 ...
 
@@ -478,7 +478,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
-    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
     %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
 ...
 
@@ -495,7 +495,7 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
-    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
     %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
 ...
 
@@ -507,7 +507,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
 ...
 
@@ -519,7 +519,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
 ...
 
@@ -530,43 +530,43 @@ name: dpp_undef_old
 tracksRegLiveness: true
 body: |
   bb.0:
-    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which writes a physreg.
 # GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
 name: phys_dpp_mov_dst
 tracksRegLiveness: true
 body: |
   bb.0:
-    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which reads a physreg.
 # GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
 name: phys_dpp_mov_old_src
 tracksRegLiveness: true
 body: |
   bb.0:
-    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
 ...
 
 # Do not combine a dpp mov which reads a physreg.
 # GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
 name: phys_dpp_mov_src
 tracksRegLiveness: true
 body: |
   bb.0:
-    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
 ...
 
@@ -587,8 +587,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -599,7 +599,7 @@ body: |
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -612,8 +612,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -623,7 +623,7 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
@@ -637,8 +637,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -648,8 +648,8 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,8 +673,8 @@ body: |
 # GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 # GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN:   S_BRANCH %bb.1
 # GCN: bb.1:
@@ -689,8 +689,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     S_BRANCH %bb.1
 
@@ -703,8 +703,8 @@ body: |
 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
 # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
@@ -718,8 +718,8 @@ body: |
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
     %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
@@ -727,8 +727,8 @@ body: |
 ...
 
 # GCN-LABEL: name: dpp_reg_sequence_src2_reject
-#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 #GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
 #GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
@@ -741,8 +741,8 @@ body: |
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
     ; use of dpp arg as src2, reject
@@ -752,7 +752,7 @@ body: |
 ...
 
 # GCN-LABEL: name: dpp_reg_sequence_src2
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
 #GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
 #GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -764,8 +764,8 @@ body: |
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
     %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
@@ -810,12 +810,12 @@ body: |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:sreg_32_xm0_xexec = IMPLICIT_DEF
     %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
 
     ; src2 is legal for _e64
-    %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %2, 0,%1, 1, 15, 15, 1, implicit $exec
     %7:sreg_32_xm0_xexec = IMPLICIT_DEF
     %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
 ...
@@ -835,7 +835,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
     S_ENDPGM 0, implicit %4
 
@@ -850,7 +850,7 @@ body: |
     liveins: $vgpr0, $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
-    %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
 ...
 
@@ -864,8 +864,8 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0,%1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index 8cd25298d74739..9f23ad94597399 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -173,17 +173,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX908-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX908-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
   ; GFX908-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX908-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX908-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
   ; GFX908-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
   ; GFX908-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
   ; GFX908-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -235,17 +235,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX90A_GFX940-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+  ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
   ; GFX90A_GFX940-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
   ; GFX90A_GFX940-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -293,13 +293,13 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ; GFX11_GFX12-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX11_GFX12-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
   ; GFX11_GFX12-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
+  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
   ; GFX11_GFX12-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
+  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
   ; GFX11_GFX12-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
+  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
   ; GFX11_GFX12-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
+  ; GFX11_GFX12-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
   ; GFX11_GFX12-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
   ; GFX11_GFX12-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX11_GFX12-NEXT:   [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 77be7f506aaf2a..699c5e61b30bbf 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -179,19 +179,19 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ; GFX11-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
   ; GFX11-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
   ; GFX11-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]],  0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]],  0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]],  0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
   ; GFX11-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GFX11-NEXT:   [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]],  0, killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
-  ; GFX11-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+  ; GFX11-NEXT:   [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]],  0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
   ; GFX11-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
   ; GFX11-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_3]]
   ; GFX11-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index cd80098b012094..09703117f41a46 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -419,11 +419,11 @@ name: dpp
 body: |
   bb.0:
     $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
+    $vgpr1 = V_MOV_B32_dpp $vgpr1, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
     implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
-    $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
+    $vgpr3 = V_MOV_B32_dpp $vgpr3, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
     S_ENDPGM 0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb6360..720736b51c8394 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -82,16 +82,16 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e64
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
+    ; GCN: renamable $vgpr0 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 0, 2, 0, implicit $exec
     ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
     ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 0, 3, 0, implicit $exec
     ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
     ; GCN-NEXT: S_ENDPGM 0
-    %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
-    %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
+    %0:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e64 0, 2, 0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e64 0, 3, 0, implicit $exec
     S_NOP 0, implicit %0
     S_NOP 0, implicit %1
     S_NOP 0, implicit %2
@@ -105,10 +105,10 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
-    ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
@@ -116,9 +116,9 @@ body:             |
     ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
     ; GCN-NEXT: S_ENDPGM 0
-    %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
-    %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
-    %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     S_NOP 0, implicit %1
     S_NOP 0, implicit %2
     S_NOP 0, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
index 4b6e204ecf9570..5c6783b376a1a6 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
@@ -9,8 +9,8 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
-  ; CHECK-NEXT:   %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+  ; CHECK-NEXT:   %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
   ; CHECK-NEXT:   %live0:vgpr_32 = V_ADD_U32_e32 %src0, %src1, implicit $exec
   ; CHECK-NEXT:   %live1:vgpr_32 = V_ADD_U32_e32 %live0, %src1, implicit $exec
   ; CHECK-NEXT: {{  $}}
@@ -20,8 +20,8 @@ body:             |
   ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.1
-    %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
-    %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+    %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+    %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
     %live0:vgpr_32 = V_ADD_U32_e32 %src0:vgpr_32, %src1:vgpr_32, implicit $exec
     %live1:vgpr_32 = V_ADD_U32_e32 %live0:vgpr_32, %src1:vgpr_32, implicit $exec
 
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
index dcb51fcb766533..ec9e1db6e76499 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
@@ -100,7 +100,7 @@ body:             |
     %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
 
     %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
-    %26 = V_MOV_B32_e64 %25, implicit $exec
+    %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
     %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
     %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
     %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index a2cad1398bcd1b..4108102e865d68 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -109,7 +109,7 @@ body:             |
     %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
 
     %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
-    %26 = V_MOV_B32_e64 %25, implicit $exec
+    %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
     %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
     %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
     %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 876fa6f5f27446..7647cf9f634715 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -54,8 +54,9 @@ body:             |
 
     %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec
     %19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
-    %20 = V_MOV_B32_e64 %19, implicit $exec
+    %20 = V_MOV_B32_e64 0, %19, 0, implicit $exec
 
     FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
+
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index b8c935927ff707..c29bccf2fa6a5d 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -17,15 +17,15 @@ body:             |
     ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
-    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
     ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
-    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
     ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
     ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
-    ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
     ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
     ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
     ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
@@ -35,41 +35,41 @@ body:             |
     %2:vgpr_32 = COPY $vgpr2
     %3:vgpr_32 = IMPLICIT_DEF
 
-    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
 
     ; unsafe to combine cmpx
-    %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
 
-    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
 
-    %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
 
     ; unsafe to combine cmpx
-    %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
 
-    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
 
     ; shrink
-    %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %13:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
 
     ; do not shrink True16 instructions
-    %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
 
     ; do not shrink, sdst used
-    %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %17:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     %18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec
     %19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc
 
     ; commute
-    %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+    %20:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
     V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec
 
 ...
@@ -88,9 +88,9 @@ body:             |
     ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 14, 1, implicit $exec
     ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
-    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[COPY1]], 1, 13, 15, 1, implicit $exec
     ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
@@ -99,10 +99,10 @@ body:             |
 
     ; Do not combine VOPC when row_mask or bank_mask is not 0xf
     ; All cases are covered by generic rules for creating DPP instructions
-    %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
+    %4:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 14, 1, implicit $exec
     %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
 
-    %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
+    %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 13, 15, 1, implicit $exec
     %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 20476b6afd674c..27373dee64f338 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -178,7 +178,7 @@ body:             |
     %11:vgpr_32 = COPY %16
     %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
     %14:vgpr_32 = COPY %7
-    %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
+    %13:vgpr_32 = V_MOV_B32_dpp %14, 0, killed %10, 323, 12, 15, 0, implicit $exec
     early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
     BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
     S_ENDPGM 0
@@ -210,7 +210,7 @@ body:             |
     %8:sreg_64 = COPY $exec
     %9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
-    %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
+    %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, 0, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
     %12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
     early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
index 1cfafebe2c3cd4..e84f559c854a49 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
@@ -106,6 +106,15 @@ v_mov_b32_e64 v5, 0.5
 v_mov_b32_e64 v5, -4.0
 // GFX10: encoding: [0x05,0x00,0x81,0xd5,0xf7,0x00,0x00,0x00]
 
+v_mov_b32_e64 v5, |v1|
+// GFX10: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
+v_mov_b32_e64 v5, -m0
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+
 v_mov_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // GFX10: encoding: [0xf9,0x02,0x0a,0x7e,0x01,0x06,0x06,0x00]
 
@@ -11005,6 +11014,15 @@ v_movreld_b32_e64 v5, 0.5
 v_movreld_b32_e64 v5, -4.0
 // GFX10: encoding: [0x05,0x00,0xc2,0xd5,0xf7,0x00,0x00,0x00]
 
+v_movreld_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
 v_movreld_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00]
 
@@ -11035,6 +11053,12 @@ v_movrels_b32_e64 v255, v1
 v_movrels_b32_e64 v5, v255
 // GFX10: encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrels_b32_e64 v5, -v1
+//GFX10: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+//GFX10: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrels_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // GFX10: encoding: [0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00]
 
@@ -11077,6 +11101,12 @@ v_movrelsd_b32_e64 v255, v1
 v_movrelsd_b32_e64 v5, v255
 // GFX10: encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00]
 
@@ -11122,6 +11152,12 @@ v_movrelsd_2_b32_e64 v255, v1
 v_movrelsd_2_b32_e64 v5, v255
 // GFX10: encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_2_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index 9a65c6687f3f84..74f6e9391c989a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
 v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
@@ -2016,6 +2028,18 @@ v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
@@ -2058,6 +2082,18 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
@@ -2100,6 +2136,18 @@ v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
 v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
@@ -2142,6 +2190,18 @@ v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:
 v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index 3897b82785f65b..6db9923c41f5f2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -495,6 +495,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 // GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
@@ -507,6 +513,12 @@ v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11 : [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
@@ -516,6 +528,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
@@ -525,6 +543,12 @@ v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
@@ -534,6 +558,12 @@ v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
index fb4e9108fe1d1a..c1790c953d906d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
 v_mov_b32_e64 v255, 0xaf123456
 // GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 
+v_mov_b32_e64 v5, |v1|
+// GFX11: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
 v_movreld_b32_e64 v5, v1
 // GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
 
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
 v_movreld_b32_e64 v255, 0xaf123456
 // GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 
+v_movreld_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
 v_movrels_b32_e64 v5, v1
 // GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrels_b32_e64 v255, v255
 // GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrels_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_2_b32_e64 v5, v1
 // GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrelsd_2_b32_e64 v255, v255
 // GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_b32_e64 v5, v1
 // GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrelsd_b32_e64 v255, v255
 // GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
 v_nop_e64
 // GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
index e35bb632906722..ebda8b65f8505e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
 v_mov_b32_e64 v255, 0xaf123456
 // GFX12: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 
+v_mov_b32_e64 v5, |v1|
+// GFX12: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
 v_movreld_b32_e64 v5, v1
 // GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
 
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
 v_movreld_b32_e64 v255, 0xaf123456
 // GFX12: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 
+v_movreld_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
 v_movrels_b32_e64 v5, v1
 // GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrels_b32_e64 v255, v255
 // GFX12: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrels_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_2_b32_e64 v5, v1
 // GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrelsd_2_b32_e64 v255, v255
 // GFX12: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
 v_movrelsd_b32_e64 v5, v1
 // GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
 
 v_movrelsd_b32_e64 v255, v255
 // GFX12: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 
+v_movrelsd_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
 v_nop_e64
 // GFX12: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index 6b915bd14683a2..def9e8212f1556 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
 v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
 v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
@@ -2016,6 +2028,19 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
 // GFX12: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
 
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12:  [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12:  [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12:  [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
+
 v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
index 61266f3776c284..4d21a54b354744 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
@@ -498,6 +498,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
@@ -507,6 +513,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
 v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX12: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index 4c0170ca4e4747..d9661b1e7a4332 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -14004,6 +14004,12 @@
 # GFX10: v_mov_b32_e64 v5, vcc_lo                ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00]
 0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00
 
+# GFX10: v_mov_b32_e64 v5, |v1|                  ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX10: v_mov_b32_e64 v5, -m0                   ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20
+
 # GFX10: v_movreld_b32_e64 v255, v1              ; encoding: [0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
 0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
 
@@ -14028,6 +14034,15 @@
 # GFX10: v_movreld_b32_e64 v5, v255              ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00]
 0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00
 
+# GFX10: v_movreld_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movreld_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX10: v_movreld_b32_e64 v5, -s1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
 # GFX10: v_movrels_b32_e64 v255, v1              ; encoding: [0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
 0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
 
@@ -14037,6 +14052,12 @@
 # GFX10: v_movrels_b32_e64 v5, v255              ; encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
 
+# GFX10: v_movrels_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrels_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
 # GFX10: v_movrelsd_2_b32_e64 v255, v1           ; encoding: [0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
 0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
 
@@ -14046,6 +14067,12 @@
 # GFX10: v_movrelsd_2_b32_e64 v5, v255           ; encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
 
+# GFX10: v_movrelsd_2_b32_e64 v5, -v1            ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_2_b32_e64 v5, |v2|           ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
 # GFX10: v_movrelsd_b32_e64 v255, v1             ; encoding: [0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
 0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
 
@@ -14055,6 +14082,12 @@
 # GFX10: v_movrelsd_b32_e64 v5, v255             ; encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
 
+# GFX10: v_movrelsd_b32_e64 v5, -v1              ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_b32_e64 v5, |v2|             ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
 # GFX10: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04]
 0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 8758305258387c..922c7f5ce14163 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,6 +1765,18 @@
 # GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1807,6 +1819,18 @@
 # GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1849,6 +1873,18 @@
 # GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1891,6 +1927,18 @@
 # GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1933,6 +1981,18 @@
 # GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index a3531410ac401f..bc785c28e46680 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,30 +253,60 @@
 # GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
 
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
 
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index 770e3c6e7a6f30..8fbd2708f0f26f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
 # GFX11: v_mov_b32_e64 v255, 0xaf123456          ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
 
+# GFX11: v_mov_b32_e64 v5, |v1|                  ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX11: v_mov_b32_e64 v5, -m0                   ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX11: v_mov_b32_e64 v5, -v1                   ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
 # GFX11: v_movreld_b32_e64 v5, v1                ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
 
@@ -2358,24 +2367,51 @@
 # GFX11: v_movreld_b32_e64 v255, 0xaf123456      ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
 
+# GFX11: v_movreld_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movreld_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX11: v_movreld_b32_e64 v5, -s1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
 # GFX11: v_movrels_b32_e64 v5, v1                ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
 
 # GFX11: v_movrels_b32_e64 v255, v255            ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
 
+# GFX11: v_movrels_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrels_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
 # GFX11: v_movrelsd_2_b32_e64 v5, v1             ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
 
 # GFX11: v_movrelsd_2_b32_e64 v255, v255         ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
 
+# GFX11: v_movrelsd_2_b32_e64 v5, -v1            ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_2_b32_e64 v5, |v2|           ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
 # GFX11: v_movrelsd_b32_e64 v5, v1               ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
 
 # GFX11: v_movrelsd_b32_e64 v255, v255           ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
 
+# GFX11: v_movrelsd_b32_e64 v5, -v1              ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_b32_e64 v5, |v2|             ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
 # GFX11: v_nop                                   ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
 0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 4fe4284e8eb4e6..c7c1ba84e86f67 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
 # GFX12: v_mov_b32_e64 v255, 0xaf123456          ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
 
+# GFX12: v_mov_b32_e64 v5, |v1|                  ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX12: v_mov_b32_e64 v5, -m0                   ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX12: v_mov_b32_e64 v5, -v1                   ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
 # GFX12: v_movreld_b32_e64 v5, v1                ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
 
@@ -2358,24 +2367,51 @@
 # GFX12: v_movreld_b32_e64 v255, 0xaf123456      ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
 0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
 
+# GFX12: v_movreld_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movreld_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX12: v_movreld_b32_e64 v5, -s1               ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
 # GFX12: v_movrels_b32_e64 v5, v1                ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
 
 # GFX12: v_movrels_b32_e64 v255, v255            ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
 
+# GFX12: v_movrels_b32_e64 v5, -v1               ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrels_b32_e64 v5, |v2|              ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
 # GFX12: v_movrelsd_2_b32_e64 v5, v1             ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
 
 # GFX12: v_movrelsd_2_b32_e64 v255, v255         ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
 
+# GFX12: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+# GFX12: _movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
 # GFX12: v_movrelsd_b32_e64 v5, v1               ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
 
 # GFX12: v_movrelsd_b32_e64 v255, v255           ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
 0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
 
+# GFX12: v_movrelsd_b32_e64 v5, -v1              ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrelsd_b32_e64 v5, |v2|             ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
 # GFX12: v_not_b16_e64 v5, v1                    ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00]
 0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index e914d139e240e1..47e2e7bd0afa5a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -1764,6 +1764,18 @@
 # GFX12: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX12: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX12: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
@@ -1806,6 +1818,18 @@
 # GFX12: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX12: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index 2a4b677620d387..964f3c3126cb0e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -402,12 +402,24 @@
 # GFX12: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX12: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX12: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 
 # GFX12: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX12: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 

>From 6c3877e169bba21305991da4b4567b38951b115a Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 11:41:39 +0100
Subject: [PATCH 2/8] Formating

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  2 +-
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    | 19 ++++++++++++-------
 llvm/test/CodeGen/AMDGPU/dpp_combine.mir      |  2 +-
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir |  2 +-
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e315fca0f4bf97..409d0477e065c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,7 +2600,7 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
 
     for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
       const MachineOperand &SrcOp = MI.getOperand(I);
-      if(I == 2)
+      if (I == 2)
         MovDPP.addImm(0); // add src modifier
       assert(!SrcOp.isFPImm());
       if (SrcOp.isImm()) {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4bb1972cb0ea38..56e67f2c54fffd 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -404,7 +404,10 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
   let Outs = (outs);
   let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
 
-  let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+  let Ins64 = !con((ins Src0RC64:$vdst),
+                   !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers),
+                       (ins FPVRegInputMods:$src0_modifiers)),
+                   (ins Src1RC:$src0, clampmod0:$clamp));
 
   let Asm32 = getAsm32<1, 1>.ret;
 
@@ -423,12 +426,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
                       dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                       bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
   let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
-  let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
+  let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
   let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
 
   let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
 
-
   let OutsVOP3DPP = (outs Src0RC64:$vdst);
   let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
   let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -1390,7 +1392,8 @@ defm V_CVT_PK_F32_BF8    : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
 
 let OtherPredicates = [isGFX10Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+                            timm:$dpp8)),
   (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
@@ -1400,8 +1403,9 @@ def : GCNPat <
 //===----------------------------------------------------------------------===//
 let OtherPredicates = [isGFX11Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
-  (V_MOV_B32_dpp8_gfx11  VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+                            timm:$dpp8)),
+  (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
 } // End OtherPredicates = [isGFX11Only]
@@ -1412,7 +1416,8 @@ def : GCNPat <
 
 let OtherPredicates = [isGFX12Only] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+  (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+                            timm:$dpp8)),
   (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
                         (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
 >;
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 265762f9c83b8a..9442af2aa1c220 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -346,7 +346,7 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
 
     ; this should be combined as all modifiers are default
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
 
     ; this shouldn't be combined as clamp is set
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fc87170a1c8689..5f91d1af8fe462 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -850,7 +850,7 @@ body: |
     liveins: $vgpr0, $vgpr1
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = COPY $vgpr1
-    %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
 ...
 

>From 29138392ca1950dbf2fca5cceb154e9acdb0f451 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 14:37:43 +0100
Subject: [PATCH 3/8] Undo formatting

---
 llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 56e67f2c54fffd..a112e14e6b4437 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -232,10 +232,10 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-  defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
 
-  let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
-    defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
 } // End isMoveImm = 1
 
 // FIXME: Specify SchedRW for READFIRSTLANE_B32

>From 2dd479a2f9ecd97bd338f66c05e086cd104d4ab6 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:25:10 +0100
Subject: [PATCH 4/8] Delete extra line

---
 llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index def9e8212f1556..e6fbe31586afd4 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -2040,7 +2040,6 @@ v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
 v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
 // GFX12:  [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
 
-
 v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
 // GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 

>From 2177ecf576fd923ac48ebd61b926973199d80284 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:39:11 +0100
Subject: [PATCH 5/8] Dasm test fix

---
 .../AMDGPU/gfx11_dasm_vop1_dpp16.txt          | 60 -------------------
 .../AMDGPU/gfx11_dasm_vop1_dpp8.txt           | 30 ----------
 .../gfx11_dasm_vop3_dpp16_from_vop1.txt       | 60 +++++++++++++++++++
 .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 30 ++++++++++
 4 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 922c7f5ce14163..8758305258387c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,18 +1765,6 @@
 # GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
-# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
 # GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1819,18 +1807,6 @@
 # GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
 # GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1873,18 +1849,6 @@
 # GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
 # GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1927,18 +1891,6 @@
 # GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
 # GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
@@ -1981,18 +1933,6 @@
 # GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
 0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
 
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
 # GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index bc785c28e46680..a3531410ac401f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,60 +253,30 @@
 # GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
 
-# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
 # GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
 
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
 # GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
 
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
 # GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
 
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
 # GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
 0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
 
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
 # GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index cf29efa5ff56bb..f9f76a1ae2147b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -1764,6 +1764,18 @@
 # GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
@@ -1806,6 +1818,18 @@
 # GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
@@ -1848,6 +1872,18 @@
 # GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
@@ -1890,6 +1926,18 @@
 # GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
@@ -1932,6 +1980,18 @@
 # GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
 0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
 
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
 # GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
 0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index bfda6d10c2f6d4..e862e32c456e46 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -402,30 +402,60 @@
 # GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 
 # GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 
 # GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
 0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
 
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
 # GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
 0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 

>From be59750f015e17d536efc3ac163d6cfa9a83f92c Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:40:08 +0100
Subject: [PATCH 6/8] Deleted line

---
 llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 7647cf9f634715..1a04fdc8bae1ac 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -59,4 +59,3 @@ body:             |
     FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
-

>From 3c512d13f9eac55976529c2988015da45f06501f Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:40:40 +0100
Subject: [PATCH 7/8] Deleted space after comma

---
 llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 5f91d1af8fe462..a60f765d5b8f2d 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -815,7 +815,7 @@ body: |
     %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
 
     ; src2 is legal for _e64
-    %6:vgpr_32 = V_MOV_B32_dpp %2, 0,%1, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %7:sreg_32_xm0_xexec = IMPLICIT_DEF
     %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
 ...
@@ -835,7 +835,7 @@ body:             |
     %1:vgpr_32 = COPY $vgpr1
     %2:vgpr_32 = IMPLICIT_DEF
 
-    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
     S_ENDPGM 0, implicit %4
 
@@ -864,7 +864,7 @@ body: |
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
-    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0,%1.sub0, 1, 15, 15, 1, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec

>From 478de367aba186333d5c323087274447691a2dfe Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 12 Jan 2024 15:38:34 +0100
Subject: [PATCH 8/8] Added logic to GCNDPPCombine and SIInstrInfo, added tests
 for added logic

---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp      | 36 ++++++++++++++---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  9 ++++-
 llvm/test/CodeGen/AMDGPU/dpp_combine.mir      | 32 ++++++++++++---
 .../CodeGen/AMDGPU/fold-mov-modifiers.mir     | 40 +++++++++++++++++++
 4 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index a75082268c7739..ac1308b740bfd0 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -42,6 +42,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
 
 using namespace llvm;
 
@@ -274,18 +275,41 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
       break;
     }
 
-    if (auto *Mod0 = TII->getNamedOperand(OrigMI,
-                                          AMDGPU::OpName::src0_modifiers)) {
-      assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
-                                          AMDGPU::OpName::src0_modifiers));
+    MachineOperand *MovMod = nullptr;
+    if (AMDGPU::hasNamedOperand(MovMI.getOpcode(),
+                                AMDGPU::OpName::src0_modifiers)) {
+      MovMod = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0_modifiers);
+      if (MovMod)
+        assert(0LL == (MovMod->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+    }
+    if (auto *Mod0 =
+            TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers)) {
+      assert(NumOperands ==
+             AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src0_modifiers));
       assert(HasVOP3DPP ||
              (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
-      DPPInst.addImm(Mod0->getImm());
+      // MovMod   MIMod
+      // abs      abs   -> abs
+      // abs      neg   -> abs|neg
+      // neg      abs   -> abs
+      // neg      neg   -> 0
+      if (MovMod && MovMod->getImm() == SISrcMods::ABS &&
+          Mod0->getImm() == SISrcMods::NEG)
+        DPPInst.addImm(SISrcMods::ABS | SISrcMods::NEG);
+      else if (MovMod && MovMod->getImm() == SISrcMods::NEG &&
+               Mod0->getImm() == SISrcMods::NEG)
+        DPPInst.addImm(0);
+      else
+        DPPInst.addImm(Mod0->getImm());
       ++NumOperands;
     } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
-      DPPInst.addImm(0);
+      if (MovMod)
+        DPPInst.addImm(MovMod->getImm());
+      else
+        DPPInst.addImm(0);
       ++NumOperands;
     }
+
     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
     assert(Src0);
     int Src0Idx = NumOperands;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 409d0477e065c9..4ccb5519714367 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3298,7 +3298,6 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
 bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B32_e32:
-  case AMDGPU::V_MOV_B32_e64:
   case AMDGPU::V_MOV_B64_PSEUDO:
   case AMDGPU::V_MOV_B64_e32:
   case AMDGPU::V_MOV_B64_e64:
@@ -3311,6 +3310,14 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
     return true;
+  case AMDGPU::V_MOV_B32_e64:
+    if (MI
+            .getOperand(AMDGPU::getNamedOperandIdx(
+                AMDGPU::V_MOV_B32_e64, AMDGPU::OpName::src0_modifiers))
+            .getImm() == 0)
+      return true;
+    else
+      return false;
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 9442af2aa1c220..ef819e9330c934 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -300,8 +300,12 @@ body:             |
 # GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
 # GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
 # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_F32_dpp %2, 3, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %14:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %16:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
 
 name: add_f32_e64
 tracksRegLiveness: true
@@ -321,13 +325,29 @@ body:             |
     %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
     %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
 
+    ; this should combine MOV and ADD modifiers when they are both neg
+    %7:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_ADD_F32_e64 1, %7, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should combine MOV and ADD modifiers when they are abs(MOV) and neg(ADD)
+    %9:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_ADD_F32_e64 1, %9, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should combine MOV and ADD modifiers when they are neg(MOV) and abs(ADD)
+    %11:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
+    %12:vgpr_32 = V_ADD_F32_e64 2, %11, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+    ; this should combine MOV and ADD modifiers when they are both abs
+    %13:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
+    %14:vgpr_32 = V_ADD_F32_e64 2, %13, 0, %0, 0, 0, implicit $mode, implicit $exec
+
     ; this should be combined as modifiers other than abs|neg are default
-    %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
-    %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
+    %15:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+    %16:vgpr_32 = V_ADD_F32_e64 1, %15, 2, %0, 0, 0, implicit $mode, implicit $exec
 
     ; this shouldn't be combined as modifiers aren't abs|neg
-    %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
-    %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
+    %17:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+    %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
 ...
 
 # check for e64 modifiers
diff --git a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
new file mode 100644
index 00000000000000..d2bf9b553d8e14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
@@ -0,0 +1,40 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands  %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: fold-no-modifier
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vcc
+
+    ; GCN-LABEL: name: fold-no-modifier
+    ; GCN: liveins: $vgpr0, $vgpr1, $vcc
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $vcc, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+    %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: no-fold-with-modifier
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vcc
+
+    ; GCN-LABEL: name: no-fold-with-modifier
+    ; GCN: liveins: $vgpr0, $vgpr1, $vcc
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
+    ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[COPY]], [[V_MOV_B32_e64_]], implicit-def $vcc, implicit $vcc, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
+    %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec



More information about the llvm-commits mailing list