[llvm] [AMDGPU][MC] Support src modifiers for v_mov_b32 and v_movrel* instructions (PR #76498)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 29 03:40:23 PST 2023
https://github.com/ankurepa updated https://github.com/llvm/llvm-project/pull/76498
>From 685cd9b0bc8bbb388f1319124a59538eafed0586 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 09:06:10 +0100
Subject: [PATCH 1/6] [AMDGPU][MC] Support src modifiers for v_mov_b32 and
v_movrel* instructions
Resolve #54795
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 71 +++++--
.../global-atomic-fadd.f32-no-rtn.ll | 12 +-
.../GlobalISel/global-atomic-fadd.f32-rtn.ll | 12 +-
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 4 +-
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 180 ++++++++---------
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 182 +++++++++---------
.../AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 32 +--
.../AMDGPU/global-atomic-fadd.f32-rtn.ll | 12 +-
.../CodeGen/AMDGPU/inserted-wait-states.mir | 4 +-
llvm/test/CodeGen/AMDGPU/remat-vop.mir | 24 +--
.../AMDGPU/schedule-ilp-liveness-tracking.mir | 8 +-
.../AMDGPU/sdwa-peephole-instr-gfx10.mir | 2 +-
.../CodeGen/AMDGPU/sdwa-peephole-instr.mir | 2 +-
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 3 +-
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 34 ++--
llvm/test/CodeGen/AMDGPU/wqm.mir | 4 +-
llvm/test/MC/AMDGPU/gfx10_asm_vop1.s | 36 ++++
.../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 60 ++++++
.../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 30 +++
.../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 36 ++++
.../test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s | 36 ++++
.../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 25 +++
.../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s | 12 ++
.../MC/Disassembler/AMDGPU/gfx10_vop3.txt | 33 ++++
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 60 ++++++
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 30 +++
.../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 36 ++++
.../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 36 ++++
.../gfx12_dasm_vop3_from_vop1_dpp16.txt | 24 +++
.../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 12 ++
31 files changed, 777 insertions(+), 277 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ebe23a5eac57b5..e315fca0f4bf97 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,6 +2600,8 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
+ if(I == 2)
+ MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
APInt Imm(64, SrcOp.getImm());
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac97..4bb1972cb0ea38 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -222,13 +222,20 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
let InsVOPDY = (ins Src0RC32:$src0Y);
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FP32InputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
-defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+ let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+ defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
@@ -369,9 +376,21 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
// Restrict src0 to be VGPR
+def VOP_PERMLANE : VOPProfile<[i32, i32, untyped, untyped]> {
+ let Src0RC32 = VRegSrc_32;
+ let Src0RC64 = VRegSrc_32;
+}
+
def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FPVRegInputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
// Special case because there are no true output operands. Hack vdst
@@ -384,7 +403,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
- let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
+
+ let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+
let Asm32 = getAsm32<1, 1>.ret;
let OutsSDWA = (outs Src0RC32:$vdst);
@@ -394,13 +415,20 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let OutsDPP = (outs Src0RC32:$vdst);
- let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+ let InsDPP = (ins Src0RC32:$old,
+ FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
+ let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
+
+
let OutsVOP3DPP = (outs Src0RC64:$vdst);
let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -413,6 +441,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let HasDst = 0;
let EmitDst = 1; // force vdst emission
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = !if(!eq(Src1RC, VSrc_b32), FP32InputMods, FPVRegInputMods);
+ let Src0ModVOP3DPP = FPVRegInputMods;
}
def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
@@ -658,9 +691,9 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let SubtargetPredicate = isGFX11Plus in {
// Restrict src0 to be VGPR
- def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
+ def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_PERMLANE,
getVOP1Pat64<int_amdgcn_permlane64,
- VOP_MOVRELS>.ret,
+ VOP_PERMLANE>.ret,
/*VOP1Only=*/ 1>;
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
@@ -1252,18 +1285,18 @@ def V_MOV_B32_indirect_read : VPseudoInstSI<
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ (i32 (int_amdgcn_mov_dpp (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
class UpdateDPPPat<ValueType vt> : GCNPat <
- (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
+ (vt (int_amdgcn_update_dpp vt:$old,(vt(VOP3Mods vt:$src, i32:$src0_modifiers)), timm:$dpp_ctrl,
timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$old,i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
@@ -1357,20 +1390,18 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Only]
-
//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
-
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1381,8 +1412,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index bfb2ecde783a63..c6b6572082d05a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -174,22 +174,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index d2c42292a03642..b74ac41ed813ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -176,24 +176,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], 0, [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]]
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index dfaa7b4efac39c..b9446a09b03028 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -39,8 +39,8 @@ body: |
# DPP64 does not support all control values and must be split to become legal
# GCN-LABEL: name: dpp64_illegal_ctrl
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
-# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
# GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec
name: dpp64_illegal_ctrl
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index becc2bb095cc4b..265762f9c83b8a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -25,29 +25,29 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -109,29 +109,29 @@ body: |
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -158,19 +158,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 15, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 15, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -196,19 +196,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -234,19 +234,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 1, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 1, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 1, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -268,28 +268,28 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
+ %18:vgpr_32 = V_MOV_B32_dpp %17, 0, %0, 1, 14, 15, 0, implicit $exec
%19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
...
@@ -297,7 +297,7 @@ body: |
# check for floating point modifiers
# GCN-LABEL: name: add_f32_e64
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -314,19 +314,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as omod is set
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this shouldn't be combined as modifiers aren't abs|neg
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -346,11 +346,11 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this shouldn't be combined as clamp is set
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
@@ -368,7 +368,7 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as the carry-out is used
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec
S_NOP 0, implicit %5
@@ -380,7 +380,7 @@ body: |
# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -391,12 +391,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec
@@ -418,7 +418,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -450,7 +450,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -471,13 +471,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -488,7 +488,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -511,7 +511,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -528,7 +528,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -545,7 +545,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -557,7 +557,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -569,7 +569,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -580,43 +580,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -637,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -649,7 +649,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,7 +673,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
@@ -687,8 +687,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -698,8 +698,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -712,8 +712,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -723,8 +723,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -739,8 +739,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -753,8 +753,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
@@ -768,8 +768,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
@@ -803,7 +803,7 @@ body: |
...
# GCN-LABEL: name: dpp64_add64_first_combined
-# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
+# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, 0, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1
# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec
@@ -827,7 +827,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_64_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
...
@@ -847,7 +847,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -862,7 +862,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -876,8 +876,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -903,7 +903,7 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[PHI]], 323, 15, 15, 0, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -925,7 +925,7 @@ body: |
bb.1:
%4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2
- %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %1, 0, %4, 323, 15, 15, 0, implicit $exec
bb.2:
%6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fe1345e29f133d..fc87170a1c8689 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -19,7 +19,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
%5:sreg_32_xm0_xexec = IMPLICIT_DEF
%6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
@@ -27,11 +27,11 @@ body: |
%8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
; should not be combined because src2 literal is illegal
- %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
; should not be combined on subtargets where src1 imm is illegal
- %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
...
---
@@ -58,23 +58,23 @@ body: |
%4:vgpr_32 = IMPLICIT_DEF
; should be combined because src2 allows sgpr
- %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow inlinable constants for src1
- %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
; should not be combined when literal constants are used
- %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
...
---
@@ -93,9 +93,9 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %5, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
...
@@ -111,9 +111,9 @@ body: |
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -125,23 +125,23 @@ body: |
%3:vgpr_32 = IMPLICIT_DEF
; this should not be combined because op_sel is not zero
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
; this should not be combined because op_sel_hi is not all set
- %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
- %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
- %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %14:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
...
@@ -158,7 +158,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
...
@@ -179,23 +179,23 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
...
@@ -215,13 +215,13 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
...
@@ -243,24 +243,24 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
%5:vgpr_32 = IMPLICIT_DEF
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 14, 15, 0, implicit $exec
%13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
+ %16:vgpr_32 = V_MOV_B32_dpp %15, 0, %0, 1, 14, 15, 0, implicit $exec
%17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
...
@@ -284,19 +284,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as e64
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined and shrunk as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined and shrunk as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as e64
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -316,11 +316,11 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined and shrunk as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this should be combined as _e64
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
@@ -330,7 +330,7 @@ body: |
# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -341,12 +341,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec
@@ -368,7 +368,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -400,7 +400,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -421,13 +421,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -438,7 +438,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -461,7 +461,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -478,7 +478,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -495,7 +495,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -507,7 +507,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -519,7 +519,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -530,43 +530,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -587,8 +587,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -599,7 +599,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -612,8 +612,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -623,7 +623,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
@@ -637,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -648,8 +648,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,8 +673,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -689,8 +689,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -703,8 +703,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
@@ -718,8 +718,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
@@ -727,8 +727,8 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2_reject
-#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
@@ -741,8 +741,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
; use of dpp arg as src2, reject
@@ -752,7 +752,7 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -764,8 +764,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
@@ -810,12 +810,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_32_xm0_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
; src2 is legal for _e64
- %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %2, 0,%1, 1, 15, 15, 1, implicit $exec
%7:sreg_32_xm0_xexec = IMPLICIT_DEF
%8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
...
@@ -835,7 +835,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -850,7 +850,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -864,8 +864,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0,%1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index 8cd25298d74739..9f23ad94597399 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -173,17 +173,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX908-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -235,17 +235,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -293,13 +293,13 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11_GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11_GFX12-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11_GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11_GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11_GFX12-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 77be7f506aaf2a..699c5e61b30bbf 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -179,19 +179,19 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_3]]
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index cd80098b012094..09703117f41a46 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -419,11 +419,11 @@ name: dpp
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr1 = V_MOV_B32_dpp $vgpr1, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr3 = V_MOV_B32_dpp $vgpr3, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb6360..720736b51c8394 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -82,16 +82,16 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e64
- ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 0, 2, 0, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 0, 3, 0, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
+ %0:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e64 0, 2, 0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e64 0, 3, 0, implicit $exec
S_NOP 0, implicit %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
@@ -105,10 +105,10 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
- ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
@@ -116,9 +116,9 @@ body: |
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
index 4b6e204ecf9570..5c6783b376a1a6 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
@@ -9,8 +9,8 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
- ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
; CHECK-NEXT: %live0:vgpr_32 = V_ADD_U32_e32 %src0, %src1, implicit $exec
; CHECK-NEXT: %live1:vgpr_32 = V_ADD_U32_e32 %live0, %src1, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -20,8 +20,8 @@ body: |
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
- %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
- %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+ %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
%live0:vgpr_32 = V_ADD_U32_e32 %src0:vgpr_32, %src1:vgpr_32, implicit $exec
%live1:vgpr_32 = V_ADD_U32_e32 %live0:vgpr_32, %src1:vgpr_32, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
index dcb51fcb766533..ec9e1db6e76499 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
@@ -100,7 +100,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 %25, implicit $exec
+ %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index a2cad1398bcd1b..4108102e865d68 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -109,7 +109,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 %25, implicit $exec
+ %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 876fa6f5f27446..7647cf9f634715 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -54,8 +54,9 @@ body: |
%18 = V_LSHRREV_B32_e64 16, %17, implicit $exec
%19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
- %20 = V_MOV_B32_e64 %19, implicit $exec
+ %20 = V_MOV_B32_e64 0, %19, 0, implicit $exec
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
+
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index b8c935927ff707..c29bccf2fa6a5d 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -17,15 +17,15 @@ body: |
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
@@ -35,41 +35,41 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
; shrink
- %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink True16 instructions
- %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
- %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec
%19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc
; commute
- %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %20:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec
...
@@ -88,9 +88,9 @@ body: |
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 14, 1, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -99,10 +99,10 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
- %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 14, 1, implicit $exec
%99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 20476b6afd674c..27373dee64f338 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -178,7 +178,7 @@ body: |
%11:vgpr_32 = COPY %16
%10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
- %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %14, 0, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
S_ENDPGM 0
@@ -210,7 +210,7 @@ body: |
%8:sreg_64 = COPY $exec
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, 0, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
index 1cfafebe2c3cd4..e84f559c854a49 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
@@ -106,6 +106,15 @@ v_mov_b32_e64 v5, 0.5
v_mov_b32_e64 v5, -4.0
// GFX10: encoding: [0x05,0x00,0x81,0xd5,0xf7,0x00,0x00,0x00]
+v_mov_b32_e64 v5, |v1|
+// GFX10: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
+v_mov_b32_e64 v5, -m0
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+
v_mov_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x02,0x0a,0x7e,0x01,0x06,0x06,0x00]
@@ -11005,6 +11014,15 @@ v_movreld_b32_e64 v5, 0.5
v_movreld_b32_e64 v5, -4.0
// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0xf7,0x00,0x00,0x00]
+v_movreld_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movreld_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11035,6 +11053,12 @@ v_movrels_b32_e64 v255, v1
v_movrels_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+//GFX10: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+//GFX10: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrels_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11077,6 +11101,12 @@ v_movrelsd_b32_e64 v255, v1
v_movrelsd_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11122,6 +11152,12 @@ v_movrelsd_2_b32_e64 v255, v1
v_movrelsd_2_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index 9a65c6687f3f84..74f6e9391c989a 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2016,6 +2028,18 @@ v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2058,6 +2082,18 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2100,6 +2136,18 @@ v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2142,6 +2190,18 @@ v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:
v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index 3897b82785f65b..6db9923c41f5f2 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -495,6 +495,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
@@ -507,6 +513,12 @@ v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11 : [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -516,6 +528,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -525,6 +543,12 @@ v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -534,6 +558,12 @@ v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
index fb4e9108fe1d1a..c1790c953d906d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_mov_b32_e64 v5, |v1|
+// GFX11: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
v_movreld_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
v_movreld_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_movreld_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movrels_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
v_movrels_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_2_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_nop_e64
// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
index e35bb632906722..ebda8b65f8505e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX12: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_mov_b32_e64 v5, |v1|
+// GFX12: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
v_movreld_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
v_movreld_b32_e64 v255, 0xaf123456
// GFX12: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_movreld_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movrels_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
v_movrels_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_2_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_nop_e64
// GFX12: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index 6b915bd14683a2..def9e8212f1556 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2016,6 +2028,19 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
+
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
index 61266f3776c284..4d21a54b354744 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
@@ -498,6 +498,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -507,6 +513,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index 4c0170ca4e4747..d9661b1e7a4332 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -14004,6 +14004,12 @@
# GFX10: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00]
0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00
+# GFX10: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX10: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20
+
# GFX10: v_movreld_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -14028,6 +14034,15 @@
# GFX10: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX10: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX10: v_movrels_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
@@ -14037,6 +14052,12 @@
# GFX10: v_movrels_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_movrelsd_2_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
@@ -14046,6 +14067,12 @@
# GFX10: v_movrelsd_2_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrelsd_2_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_2_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_movrelsd_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
@@ -14055,6 +14082,12 @@
# GFX10: v_movrelsd_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04]
0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 8758305258387c..922c7f5ce14163 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,6 +1765,18 @@
# GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1807,6 +1819,18 @@
# GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1849,6 +1873,18 @@
# GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1891,6 +1927,18 @@
# GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1933,6 +1981,18 @@
# GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index a3531410ac401f..bc785c28e46680 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,30 +253,60 @@
# GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index 770e3c6e7a6f30..8fbd2708f0f26f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX11: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX11: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX11: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -2358,24 +2367,51 @@
# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX11: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX11: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_2_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 4fe4284e8eb4e6..c7c1ba84e86f67 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
# GFX12: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX12: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX12: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX12: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
# GFX12: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -2358,24 +2367,51 @@
# GFX12: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX12: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX12: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX12: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX12: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+# GFX12: _movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
# GFX12: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX12: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index e914d139e240e1..47e2e7bd0afa5a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -1764,6 +1764,18 @@
# GFX12: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX12: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1806,6 +1818,18 @@
# GFX12: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX12: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index 2a4b677620d387..964f3c3126cb0e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -402,12 +402,24 @@
# GFX12: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX12: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX12: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX12: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From 6c3877e169bba21305991da4b4567b38951b115a Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 11:41:39 +0100
Subject: [PATCH 2/6] Formating
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 19 ++++++++++++-------
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 2 +-
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 2 +-
4 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e315fca0f4bf97..409d0477e065c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,7 +2600,7 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
- if(I == 2)
+ if (I == 2)
MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4bb1972cb0ea38..56e67f2c54fffd 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -404,7 +404,10 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
- let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+ let Ins64 = !con((ins Src0RC64:$vdst),
+ !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers),
+ (ins FPVRegInputMods:$src0_modifiers)),
+ (ins Src1RC:$src0, clampmod0:$clamp));
let Asm32 = getAsm32<1, 1>.ret;
@@ -423,12 +426,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
-
let OutsVOP3DPP = (outs Src0RC64:$vdst);
let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -1390,7 +1392,8 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
@@ -1400,8 +1403,9 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1412,7 +1416,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
(V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 265762f9c83b8a..9442af2aa1c220 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -346,7 +346,7 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this shouldn't be combined as clamp is set
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fc87170a1c8689..5f91d1af8fe462 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -850,7 +850,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
>From 29138392ca1950dbf2fca5cceb154e9acdb0f451 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 14:37:43 +0100
Subject: [PATCH 3/6] Undo formatting
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 56e67f2c54fffd..a112e14e6b4437 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -232,10 +232,10 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
- let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
- defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
>From 2dd479a2f9ecd97bd338f66c05e086cd104d4ab6 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:25:10 +0100
Subject: [PATCH 4/6] Delete extra line
---
llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index def9e8212f1556..e6fbe31586afd4 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -2040,7 +2040,6 @@ v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
>From 2177ecf576fd923ac48ebd61b926973199d80284 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:39:11 +0100
Subject: [PATCH 5/6] Dasm test fix
---
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 60 -------------------
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 30 ----------
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 60 +++++++++++++++++++
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 30 ++++++++++
4 files changed, 90 insertions(+), 90 deletions(-)
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 922c7f5ce14163..8758305258387c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,18 +1765,6 @@
# GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1819,18 +1807,6 @@
# GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1873,18 +1849,6 @@
# GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1927,18 +1891,6 @@
# GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1981,18 +1933,6 @@
# GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index bc785c28e46680..a3531410ac401f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,60 +253,30 @@
# GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index cf29efa5ff56bb..f9f76a1ae2147b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -1764,6 +1764,18 @@
# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1806,6 +1818,18 @@
# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1848,6 +1872,18 @@
# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1890,6 +1926,18 @@
# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1932,6 +1980,18 @@
# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index bfda6d10c2f6d4..e862e32c456e46 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -402,30 +402,60 @@
# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From be59750f015e17d536efc3ac163d6cfa9a83f92c Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:40:08 +0100
Subject: [PATCH 6/6] Deleted line
---
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 7647cf9f634715..1a04fdc8bae1ac 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -59,4 +59,3 @@ body: |
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
-
More information about the llvm-commits
mailing list