[Mlir-commits] [llvm] [clang-tools-extra] [mlir] [clang] [AMDGPU][MC] Support src modifiers for v_mov_b32 and v_movrel* instructions (PR #76498)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Jan 31 02:16:07 PST 2024
https://github.com/ankurepa updated https://github.com/llvm/llvm-project/pull/76498
>From 685cd9b0bc8bbb388f1319124a59538eafed0586 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 09:06:10 +0100
Subject: [PATCH 01/20] [AMDGPU][MC] Support src modifiers for v_mov_b32 and
v_movrel* instructions
Resolve #54795
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 71 +++++--
.../global-atomic-fadd.f32-no-rtn.ll | 12 +-
.../GlobalISel/global-atomic-fadd.f32-rtn.ll | 12 +-
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 4 +-
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 180 ++++++++---------
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 182 +++++++++---------
.../AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 32 +--
.../AMDGPU/global-atomic-fadd.f32-rtn.ll | 12 +-
.../CodeGen/AMDGPU/inserted-wait-states.mir | 4 +-
llvm/test/CodeGen/AMDGPU/remat-vop.mir | 24 +--
.../AMDGPU/schedule-ilp-liveness-tracking.mir | 8 +-
.../AMDGPU/sdwa-peephole-instr-gfx10.mir | 2 +-
.../CodeGen/AMDGPU/sdwa-peephole-instr.mir | 2 +-
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 3 +-
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 34 ++--
llvm/test/CodeGen/AMDGPU/wqm.mir | 4 +-
llvm/test/MC/AMDGPU/gfx10_asm_vop1.s | 36 ++++
.../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 60 ++++++
.../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 30 +++
.../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 36 ++++
.../test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s | 36 ++++
.../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 25 +++
.../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s | 12 ++
.../MC/Disassembler/AMDGPU/gfx10_vop3.txt | 33 ++++
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 60 ++++++
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 30 +++
.../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 36 ++++
.../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 36 ++++
.../gfx12_dasm_vop3_from_vop1_dpp16.txt | 24 +++
.../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 12 ++
31 files changed, 777 insertions(+), 277 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ebe23a5eac57b..e315fca0f4bf9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,6 +2600,8 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
+ if(I == 2)
+ MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
APInt Imm(64, SrcOp.getImm());
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac9..4bb1972cb0ea3 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -222,13 +222,20 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
let InsVOPDY = (ins Src0RC32:$src0Y);
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FP32InputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
-defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+ let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+ defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
@@ -369,9 +376,21 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
// Restrict src0 to be VGPR
+def VOP_PERMLANE : VOPProfile<[i32, i32, untyped, untyped]> {
+ let Src0RC32 = VRegSrc_32;
+ let Src0RC64 = VRegSrc_32;
+}
+
def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FPVRegInputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
// Special case because there are no true output operands. Hack vdst
@@ -384,7 +403,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
- let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
+
+ let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+
let Asm32 = getAsm32<1, 1>.ret;
let OutsSDWA = (outs Src0RC32:$vdst);
@@ -394,13 +415,20 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let OutsDPP = (outs Src0RC32:$vdst);
- let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+ let InsDPP = (ins Src0RC32:$old,
+ FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
+ let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
+
+
let OutsVOP3DPP = (outs Src0RC64:$vdst);
let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -413,6 +441,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let HasDst = 0;
let EmitDst = 1; // force vdst emission
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = !if(!eq(Src1RC, VSrc_b32), FP32InputMods, FPVRegInputMods);
+ let Src0ModVOP3DPP = FPVRegInputMods;
}
def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
@@ -658,9 +691,9 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let SubtargetPredicate = isGFX11Plus in {
// Restrict src0 to be VGPR
- def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
+ def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_PERMLANE,
getVOP1Pat64<int_amdgcn_permlane64,
- VOP_MOVRELS>.ret,
+ VOP_PERMLANE>.ret,
/*VOP1Only=*/ 1>;
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
@@ -1252,18 +1285,18 @@ def V_MOV_B32_indirect_read : VPseudoInstSI<
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ (i32 (int_amdgcn_mov_dpp (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
class UpdateDPPPat<ValueType vt> : GCNPat <
- (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
+ (vt (int_amdgcn_update_dpp vt:$old,(vt(VOP3Mods vt:$src, i32:$src0_modifiers)), timm:$dpp_ctrl,
timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$old,i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
@@ -1357,20 +1390,18 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Only]
-
//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
-
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1381,8 +1412,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index bfb2ecde783a6..c6b6572082d05 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -174,22 +174,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index d2c42292a0364..b74ac41ed813b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -176,24 +176,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], 0, [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]]
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index dfaa7b4efac39..b9446a09b0302 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -39,8 +39,8 @@ body: |
# DPP64 does not support all control values and must be split to become legal
# GCN-LABEL: name: dpp64_illegal_ctrl
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
-# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
# GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec
name: dpp64_illegal_ctrl
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index becc2bb095cc4..265762f9c83b8 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -25,29 +25,29 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -109,29 +109,29 @@ body: |
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -158,19 +158,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 15, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 15, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -196,19 +196,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -234,19 +234,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 1, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 1, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 1, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -268,28 +268,28 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
+ %18:vgpr_32 = V_MOV_B32_dpp %17, 0, %0, 1, 14, 15, 0, implicit $exec
%19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
...
@@ -297,7 +297,7 @@ body: |
# check for floating point modifiers
# GCN-LABEL: name: add_f32_e64
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -314,19 +314,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as omod is set
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this shouldn't be combined as modifiers aren't abs|neg
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -346,11 +346,11 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this shouldn't be combined as clamp is set
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
@@ -368,7 +368,7 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as the carry-out is used
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec
S_NOP 0, implicit %5
@@ -380,7 +380,7 @@ body: |
# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -391,12 +391,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec
@@ -418,7 +418,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -450,7 +450,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -471,13 +471,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -488,7 +488,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -511,7 +511,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -528,7 +528,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -545,7 +545,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -557,7 +557,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -569,7 +569,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -580,43 +580,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -637,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -649,7 +649,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,7 +673,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
@@ -687,8 +687,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -698,8 +698,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -712,8 +712,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -723,8 +723,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -739,8 +739,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -753,8 +753,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
@@ -768,8 +768,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
@@ -803,7 +803,7 @@ body: |
...
# GCN-LABEL: name: dpp64_add64_first_combined
-# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
+# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, 0, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1
# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec
@@ -827,7 +827,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_64_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
...
@@ -847,7 +847,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -862,7 +862,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -876,8 +876,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -903,7 +903,7 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[PHI]], 323, 15, 15, 0, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -925,7 +925,7 @@ body: |
bb.1:
%4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2
- %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %1, 0, %4, 323, 15, 15, 0, implicit $exec
bb.2:
%6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fe1345e29f133..fc87170a1c868 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -19,7 +19,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
%5:sreg_32_xm0_xexec = IMPLICIT_DEF
%6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
@@ -27,11 +27,11 @@ body: |
%8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
; should not be combined because src2 literal is illegal
- %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
; should not be combined on subtargets where src1 imm is illegal
- %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
...
---
@@ -58,23 +58,23 @@ body: |
%4:vgpr_32 = IMPLICIT_DEF
; should be combined because src2 allows sgpr
- %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow inlinable constants for src1
- %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
; should not be combined when literal constants are used
- %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
%14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
...
---
@@ -93,9 +93,9 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %5, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
...
@@ -111,9 +111,9 @@ body: |
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -125,23 +125,23 @@ body: |
%3:vgpr_32 = IMPLICIT_DEF
; this should not be combined because op_sel is not zero
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
; this should not be combined because op_sel_hi is not all set
- %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
- %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
- %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %14:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
...
@@ -158,7 +158,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
...
@@ -179,23 +179,23 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
...
@@ -215,13 +215,13 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
...
@@ -243,24 +243,24 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
%5:vgpr_32 = IMPLICIT_DEF
- %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 14, 15, 0, implicit $exec
%13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
+ %16:vgpr_32 = V_MOV_B32_dpp %15, 0, %0, 1, 14, 15, 0, implicit $exec
%17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
...
@@ -284,19 +284,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as e64
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined and shrunk as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined and shrunk as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as e64
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -316,11 +316,11 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined and shrunk as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this should be combined as _e64
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
@@ -330,7 +330,7 @@ body: |
# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -341,12 +341,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec
@@ -368,7 +368,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -400,7 +400,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -421,13 +421,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -438,7 +438,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -461,7 +461,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -478,7 +478,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -495,7 +495,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -507,7 +507,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -519,7 +519,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -530,43 +530,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -587,8 +587,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -599,7 +599,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -612,8 +612,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -623,7 +623,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
@@ -637,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -648,8 +648,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,8 +673,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -689,8 +689,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -703,8 +703,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
@@ -718,8 +718,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
@@ -727,8 +727,8 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2_reject
-#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
@@ -741,8 +741,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
; use of dpp arg as src2, reject
@@ -752,7 +752,7 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -764,8 +764,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
@@ -810,12 +810,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_32_xm0_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
; src2 is legal for _e64
- %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %2, 0,%1, 1, 15, 15, 1, implicit $exec
%7:sreg_32_xm0_xexec = IMPLICIT_DEF
%8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
...
@@ -835,7 +835,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -850,7 +850,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -864,8 +864,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0,%1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index 8cd25298d7473..9f23ad9459739 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -173,17 +173,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX908-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -235,17 +235,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -293,13 +293,13 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11_GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11_GFX12-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11_GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11_GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11_GFX12-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 77be7f506aaf2..699c5e61b30bb 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -179,19 +179,19 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_3]]
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index cd80098b01209..09703117f41a4 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -419,11 +419,11 @@ name: dpp
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr1 = V_MOV_B32_dpp $vgpr1, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr3 = V_MOV_B32_dpp $vgpr3, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb636..720736b51c839 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -82,16 +82,16 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e64
- ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 0, 2, 0, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 0, 3, 0, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
+ %0:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e64 0, 2, 0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e64 0, 3, 0, implicit $exec
S_NOP 0, implicit %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
@@ -105,10 +105,10 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
- ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
@@ -116,9 +116,9 @@ body: |
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
index 4b6e204ecf957..5c6783b376a1a 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
@@ -9,8 +9,8 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
- ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
; CHECK-NEXT: %live0:vgpr_32 = V_ADD_U32_e32 %src0, %src1, implicit $exec
; CHECK-NEXT: %live1:vgpr_32 = V_ADD_U32_e32 %live0, %src1, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -20,8 +20,8 @@ body: |
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
- %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
- %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
+ %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
%live0:vgpr_32 = V_ADD_U32_e32 %src0:vgpr_32, %src1:vgpr_32, implicit $exec
%live1:vgpr_32 = V_ADD_U32_e32 %live0:vgpr_32, %src1:vgpr_32, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
index dcb51fcb76653..ec9e1db6e7649 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
@@ -100,7 +100,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 %25, implicit $exec
+ %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index a2cad1398bcd1..4108102e865d6 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -109,7 +109,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 %25, implicit $exec
+ %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 876fa6f5f2744..7647cf9f63471 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -54,8 +54,9 @@ body: |
%18 = V_LSHRREV_B32_e64 16, %17, implicit $exec
%19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
- %20 = V_MOV_B32_e64 %19, implicit $exec
+ %20 = V_MOV_B32_e64 0, %19, 0, implicit $exec
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
+
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index b8c935927ff70..c29bccf2fa6a5 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -17,15 +17,15 @@ body: |
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
@@ -35,41 +35,41 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
; shrink
- %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink True16 instructions
- %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
- %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
%18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec
%19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc
; commute
- %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
+ %20:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec
...
@@ -88,9 +88,9 @@ body: |
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 14, 1, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -99,10 +99,10 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
- %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 14, 1, implicit $exec
%99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 20476b6afd674..27373dee64f33 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -178,7 +178,7 @@ body: |
%11:vgpr_32 = COPY %16
%10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
- %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %14, 0, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
S_ENDPGM 0
@@ -210,7 +210,7 @@ body: |
%8:sreg_64 = COPY $exec
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, 0, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
index 1cfafebe2c3cd..e84f559c854a4 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
@@ -106,6 +106,15 @@ v_mov_b32_e64 v5, 0.5
v_mov_b32_e64 v5, -4.0
// GFX10: encoding: [0x05,0x00,0x81,0xd5,0xf7,0x00,0x00,0x00]
+v_mov_b32_e64 v5, |v1|
+// GFX10: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
+v_mov_b32_e64 v5, -m0
+// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+
v_mov_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x02,0x0a,0x7e,0x01,0x06,0x06,0x00]
@@ -11005,6 +11014,15 @@ v_movreld_b32_e64 v5, 0.5
v_movreld_b32_e64 v5, -4.0
// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0xf7,0x00,0x00,0x00]
+v_movreld_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX10: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movreld_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x84,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11035,6 +11053,12 @@ v_movrels_b32_e64 v255, v1
v_movrels_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+//GFX10: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+//GFX10: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrels_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x86,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11077,6 +11101,12 @@ v_movrelsd_b32_e64 v255, v1
v_movrelsd_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x88,0x00,0x7e,0x02,0x06,0x06,0x00]
@@ -11122,6 +11152,12 @@ v_movrelsd_2_b32_e64 v255, v1
v_movrelsd_2_b32_e64 v5, v255
// GFX10: encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX10: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX10: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x90,0x00,0x7e,0x02,0x06,0x06,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index 9a65c6687f3f8..74f6e9391c989 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2016,6 +2028,18 @@ v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2058,6 +2082,18 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2100,6 +2136,18 @@ v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr
v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2142,6 +2190,18 @@ v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:
v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index 3897b82785f65..6db9923c41f5f 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -495,6 +495,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
@@ -507,6 +513,12 @@ v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11 : [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -516,6 +528,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -525,6 +543,12 @@ v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -534,6 +558,12 @@ v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+//GFX11 : [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+//GFX11: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
index fb4e9108fe1d1..c1790c953d906 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_mov_b32_e64 v5, |v1|
+// GFX11: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
v_movreld_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
v_movreld_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_movreld_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movrels_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
v_movrels_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_2_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_b32_e64 v255, v255
// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX11: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_nop_e64
// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
index e35bb63290672..ebda8b65f8505 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
@@ -2538,6 +2538,15 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX12: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_mov_b32_e64 v5, |v1|
+// GFX12: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+
+v_mov_b32_e64 v5, -m0
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+
+v_mov_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+
v_movreld_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
@@ -2583,24 +2592,51 @@ v_movreld_b32_e64 v5, src_scc
v_movreld_b32_e64 v255, 0xaf123456
// GFX12: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+v_movreld_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+
+v_movreld_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+
+v_movreld_b32_e64 v5, -s1
+// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+
v_movrels_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
v_movrels_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
+v_movrels_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrels_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_2_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_2_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_2_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_2_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+
v_movrelsd_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
v_movrelsd_b32_e64 v255, v255
// GFX12: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
+v_movrelsd_b32_e64 v5, -v1
+// GFX12: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+
+v_movrelsd_b32_e64 v5, |v2|
+// GFX12: encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+
v_nop_e64
// GFX12: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index 6b915bd14683a..def9e8212f155 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -1974,6 +1974,18 @@ v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:
v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -2016,6 +2028,19 @@ v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+
+v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
+
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
index 61266f3776c28..4d21a54b35474 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
@@ -498,6 +498,12 @@ v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -507,6 +513,12 @@ v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index 4c0170ca4e474..d9661b1e7a433 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -14004,6 +14004,12 @@
# GFX10: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00]
0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00
+# GFX10: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX10: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20
+
# GFX10: v_movreld_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -14028,6 +14034,15 @@
# GFX10: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX10: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX10: v_movrels_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
@@ -14037,6 +14052,12 @@
# GFX10: v_movrels_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_movrelsd_2_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
@@ -14046,6 +14067,12 @@
# GFX10: v_movrelsd_2_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrelsd_2_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_2_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_movrelsd_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
@@ -14055,6 +14082,12 @@
# GFX10: v_movrelsd_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX10: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX10: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX10: v_mqsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04]
0xfe,0x00,0x73,0xd5,0x01,0x05,0x0e,0x04
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 8758305258387..922c7f5ce1416 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,6 +1765,18 @@
# GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1807,6 +1819,18 @@
# GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1849,6 +1873,18 @@
# GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1891,6 +1927,18 @@
# GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1933,6 +1981,18 @@
# GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index a3531410ac401..bc785c28e4668 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,30 +253,60 @@
# GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index 770e3c6e7a6f3..8fbd2708f0f26 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX11: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX11: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX11: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -2358,24 +2367,51 @@
# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX11: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX11: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_2_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc8,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX11: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX11: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00]
0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 4fe4284e8eb4e..c7c1ba84e86f6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -2313,6 +2313,15 @@
# GFX12: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX12: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
+0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
+
+# GFX12: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
+
+# GFX12: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
+
# GFX12: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
@@ -2358,24 +2367,51 @@
# GFX12: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
+# GFX12: v_movreld_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movreld_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc2,0xd5,0x02,0x01,0x00,0x00
+
+# GFX12: v_movreld_b32_e64 v5, -s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20]
+0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x20
+
# GFX12: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrels_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrels_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc3,0xd5,0x02,0x01,0x00,0x00
+
# GFX12: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+
+# GFX12: _movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
+[0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
# GFX12: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00
+# GFX12: v_movrelsd_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20]
+0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x20
+
+# GFX12: v_movrelsd_b32_e64 v5, |v2| ; encoding: [0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00]
+0x05,0x01,0xc4,0xd5,0x02,0x01,0x00,0x00
+
# GFX12: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index e914d139e240e..47e2e7bd0afa5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -1764,6 +1764,18 @@
# GFX12: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX12: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1806,6 +1818,18 @@
# GFX12: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX12: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index 2a4b677620d38..964f3c3126cb0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -402,12 +402,24 @@
# GFX12: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX12: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX12: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX12: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX12: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX12: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From 6c3877e169bba21305991da4b4567b38951b115a Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 11:41:39 +0100
Subject: [PATCH 02/20] Formating
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 19 ++++++++++++-------
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 2 +-
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 2 +-
4 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e315fca0f4bf9..409d0477e065c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,7 +2600,7 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
- if(I == 2)
+ if (I == 2)
MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 4bb1972cb0ea3..56e67f2c54fff 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -404,7 +404,10 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
- let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+ let Ins64 = !con((ins Src0RC64:$vdst),
+ !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers),
+ (ins FPVRegInputMods:$src0_modifiers)),
+ (ins Src1RC:$src0, clampmod0:$clamp));
let Asm32 = getAsm32<1, 1>.ret;
@@ -423,12 +426,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
-
let OutsVOP3DPP = (outs Src0RC64:$vdst);
let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -1390,7 +1392,8 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
@@ -1400,8 +1403,9 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1412,7 +1416,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
+ timm:$dpp8)),
(V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 265762f9c83b8..9442af2aa1c22 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -346,7 +346,7 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this shouldn't be combined as clamp is set
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fc87170a1c868..5f91d1af8fe46 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -850,7 +850,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, 0,%1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
>From 29138392ca1950dbf2fca5cceb154e9acdb0f451 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 28 Dec 2023 14:37:43 +0100
Subject: [PATCH 03/20] Undo formatting
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 56e67f2c54fff..a112e14e6b443 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -232,10 +232,10 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
- let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
- defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
>From 2dd479a2f9ecd97bd338f66c05e086cd104d4ab6 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:25:10 +0100
Subject: [PATCH 04/20] Delete extra line
---
llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index def9e8212f155..e6fbe31586afd 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -2040,7 +2040,6 @@ v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
// GFX12: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
>From 2177ecf576fd923ac48ebd61b926973199d80284 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:39:11 +0100
Subject: [PATCH 05/20] Dasm test fix
---
.../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 60 -------------------
.../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 30 ----------
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 60 +++++++++++++++++++
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 30 ++++++++++
4 files changed, 90 insertions(+), 90 deletions(-)
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
index 922c7f5ce1416..8758305258387 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt
@@ -1765,18 +1765,6 @@
# GFX11: v_mov_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x02,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movreld_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x84,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1819,18 +1807,6 @@
# GFX11: v_movreld_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x84,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrels_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x86,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1873,18 +1849,6 @@
# GFX11: v_movrels_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x86,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrelsd_2_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x90,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1927,18 +1891,6 @@
# GFX11: v_movrelsd_2_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x90,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movrelsd_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0x88,0x0a,0x7e,0x01,0x1b,0x00,0xff
@@ -1981,18 +1933,6 @@
# GFX11: v_movrelsd_b32_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30]
0xfa,0x88,0xfe,0x7f,0xff,0x6f,0x0d,0x30
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_not_b16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
0xfa,0xd2,0x0a,0x7e,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
index bc785c28e4668..a3531410ac401 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt
@@ -253,60 +253,30 @@
# GFX11: v_mov_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x02,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movreld_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x84,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x84,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrels_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x86,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x86,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrelsd_2_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x90,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x90,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movrelsd_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0x88,0x0a,0x7e,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00]
0xea,0x88,0xfe,0x7f,0xff,0x00,0x00,0x00
-# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_not_b16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05]
0xe9,0xd2,0x0a,0x7e,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index cf29efa5ff56b..f9f76a1ae2147 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -1764,6 +1764,18 @@
# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1806,6 +1818,18 @@
# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1848,6 +1872,18 @@
# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1890,6 +1926,18 @@
# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
@@ -1932,6 +1980,18 @@
# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
+0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+0x05,0x01,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
+
# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index bfda6d10c2f6d..e862e32c456e4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -402,30 +402,60 @@
# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movreld_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movreld_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrels_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrels_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_2_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+# GFX11: v_movrelsd_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
+0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
+
+# GFX11: v_movrelsd_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x01,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From be59750f015e17d536efc3ac163d6cfa9a83f92c Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:40:08 +0100
Subject: [PATCH 06/20] Deleted line
---
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 7647cf9f63471..1a04fdc8bae1a 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -59,4 +59,3 @@ body: |
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
-
>From 3c512d13f9eac55976529c2988015da45f06501f Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 29 Dec 2023 12:40:40 +0100
Subject: [PATCH 07/20] Deleted space after comma
---
llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 5f91d1af8fe46..a60f765d5b8f2 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -815,7 +815,7 @@ body: |
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
; src2 is legal for _e64
- %6:vgpr_32 = V_MOV_B32_dpp %2, 0,%1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 15, 1, implicit $exec
%7:sreg_32_xm0_xexec = IMPLICIT_DEF
%8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
...
@@ -835,7 +835,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -864,7 +864,7 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0,%1.sub0, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
>From 478de367aba186333d5c323087274447691a2dfe Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Fri, 12 Jan 2024 15:38:34 +0100
Subject: [PATCH 08/20] Added logic to GCNDPPCombine and SIInstrInfo, added
tests for added logic
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 36 ++++++++++++++---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 ++++-
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 32 ++++++++++++---
.../CodeGen/AMDGPU/fold-mov-modifiers.mir | 40 +++++++++++++++++++
4 files changed, 104 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index a75082268c773..ac1308b740bfd 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -42,6 +42,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
using namespace llvm;
@@ -274,18 +275,41 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
- if (auto *Mod0 = TII->getNamedOperand(OrigMI,
- AMDGPU::OpName::src0_modifiers)) {
- assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
- AMDGPU::OpName::src0_modifiers));
+ MachineOperand *MovMod = nullptr;
+ if (AMDGPU::hasNamedOperand(MovMI.getOpcode(),
+ AMDGPU::OpName::src0_modifiers)) {
+ MovMod = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0_modifiers);
+ if (MovMod)
+ assert(0LL == (MovMod->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ }
+ if (auto *Mod0 =
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers)) {
+ assert(NumOperands ==
+ AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src0_modifiers));
assert(HasVOP3DPP ||
(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
- DPPInst.addImm(Mod0->getImm());
+ // MovMod MIMod
+ // abs abs -> abs
+ // abs neg -> abs|neg
+ // neg abs -> abs
+ // neg neg -> 0
+ if (MovMod && MovMod->getImm() == SISrcMods::ABS &&
+ Mod0->getImm() == SISrcMods::NEG)
+ DPPInst.addImm(SISrcMods::ABS | SISrcMods::NEG);
+ else if (MovMod && MovMod->getImm() == SISrcMods::NEG &&
+ Mod0->getImm() == SISrcMods::NEG)
+ DPPInst.addImm(0);
+ else
+ DPPInst.addImm(Mod0->getImm());
++NumOperands;
} else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
- DPPInst.addImm(0);
+ if (MovMod)
+ DPPInst.addImm(MovMod->getImm());
+ else
+ DPPInst.addImm(0);
++NumOperands;
}
+
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
int Src0Idx = NumOperands;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 409d0477e065c..4ccb551971436 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3298,7 +3298,6 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::V_MOV_B64_e32:
case AMDGPU::V_MOV_B64_e64:
@@ -3311,6 +3310,14 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
return true;
+ case AMDGPU::V_MOV_B32_e64:
+ if (MI
+ .getOperand(AMDGPU::getNamedOperandIdx(
+ AMDGPU::V_MOV_B32_e64, AMDGPU::OpName::src0_modifiers))
+ .getImm() == 0)
+ return true;
+ else
+ return false;
default:
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 9442af2aa1c22..ef819e9330c93 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -300,8 +300,12 @@ body: |
# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_F32_dpp %2, 3, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %14:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %16:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
name: add_f32_e64
tracksRegLiveness: true
@@ -321,13 +325,29 @@ body: |
%5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
+ ; this should combine MOV and ADD modifiers when they are both neg
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_ADD_F32_e64 1, %7, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; this should combine MOV and ADD modifiers when they are abs(MOV) and neg(ADD)
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_ADD_F32_e64 1, %9, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; this should combine MOV and ADD modifiers when they are neg(MOV) and abs(ADD)
+ %11:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
+ %12:vgpr_32 = V_ADD_F32_e64 2, %11, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; this should combine MOV and ADD modifiers when they are both abs
+ %13:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
+ %14:vgpr_32 = V_ADD_F32_e64 2, %13, 0, %0, 0, 0, implicit $mode, implicit $exec
+
; this should be combined as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
- %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %16:vgpr_32 = V_ADD_F32_e64 1, %15, 2, %0, 0, 0, implicit $mode, implicit $exec
; this shouldn't be combined as modifiers aren't abs|neg
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
- %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
...
# check for e64 modifiers
diff --git a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
new file mode 100644
index 0000000000000..d2bf9b553d8e1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
@@ -0,0 +1,40 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: fold-no-modifier
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vcc
+
+ ; GCN-LABEL: name: fold-no-modifier
+ ; GCN: liveins: $vgpr0, $vgpr1, $vcc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $vcc, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: no-fold-with-modifier
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vcc
+
+ ; GCN-LABEL: name: no-fold-with-modifier
+ ; GCN: liveins: $vgpr0, $vgpr1, $vcc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
+ ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[COPY]], [[V_MOV_B32_e64_]], implicit-def $vcc, implicit $vcc, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
>From 37e6aee7ad17e00300856b4e79655afea7d6d506 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 18 Jan 2024 14:00:48 +0100
Subject: [PATCH 09/20] Fixed dpp_combine test, added them into
dpp_combine_gfx11 instead of dpp_combine, gfx10 doesn't support
v_mov_b32_e64_dpp, added clamp test
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 10 ++-
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 32 ++-------
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 67 +++++++++++++++++++
.../CodeGen/AMDGPU/fold-mov-modifiers.mir | 39 +++++++++++
4 files changed, 120 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index ac1308b740bfd..b0f44b0594a09 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -40,9 +40,9 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
using namespace llvm;
@@ -208,6 +208,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
bool CombBCZ,
bool IsShrinkable) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
@@ -299,6 +300,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
else if (MovMod && MovMod->getImm() == SISrcMods::NEG &&
Mod0->getImm() == SISrcMods::NEG)
DPPInst.addImm(0);
+ else if (Mod0->getImm() == SISrcMods::NONE && MovMod)
+ DPPInst.addImm(MovMod->getImm());
else
DPPInst.addImm(Mod0->getImm());
++NumOperands;
@@ -529,6 +532,7 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
@@ -768,7 +772,9 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
- if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
+ if ((MI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp) &&
+ combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index ef819e9330c93..a3f915cc6b124 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -300,12 +300,8 @@ body: |
# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %10:vgpr_32 = V_ADD_F32_dpp %2, 3, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %12:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %14:vgpr_32 = V_ADD_F32_dpp %2, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %16:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
name: add_f32_e64
tracksRegLiveness: true
@@ -325,29 +321,13 @@ body: |
%5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
- ; this should combine MOV and ADD modifiers when they are both neg
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
- %8:vgpr_32 = V_ADD_F32_e64 1, %7, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; this should combine MOV and ADD modifiers when they are abs(MOV) and neg(ADD)
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
- %10:vgpr_32 = V_ADD_F32_e64 1, %9, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; this should combine MOV and ADD modifiers when they are neg(MOV) and abs(ADD)
- %11:vgpr_32 = V_MOV_B32_dpp undef %2, 1, %1, 1, 15, 15, 1, implicit $exec
- %12:vgpr_32 = V_ADD_F32_e64 2, %11, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; this should combine MOV and ADD modifiers when they are both abs
- %13:vgpr_32 = V_MOV_B32_dpp undef %2, 2, %1, 1, 15, 15, 1, implicit $exec
- %14:vgpr_32 = V_ADD_F32_e64 2, %13, 0, %0, 0, 0, implicit $mode, implicit $exec
-
; this should be combined as modifiers other than abs|neg are default
- %15:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
- %16:vgpr_32 = V_ADD_F32_e64 1, %15, 2, %0, 0, 0, implicit $mode, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this shouldn't be combined as modifiers aren't abs|neg
- %17:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
- %18:vgpr_32 = V_ADD_F32_e64 4, %17, 8, %0, 0, 0, implicit $mode, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
# check for e64 modifiers
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index a60f765d5b8f2..fd495ee9b11a6 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -324,6 +324,73 @@ body: |
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
+# check for mov_e64_dpp modifiers
+# GCN-LABEL: name: mov_e64_dpp
+# GCN: %16:vgpr_32 = IMPLICIT_DEF
+# GCN: %3:vgpr_32 = V_ADD_F32_dpp %16, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %15:vgpr_32 = IMPLICIT_DEF
+# GCN: %5:vgpr_32 = V_ADD_F32_dpp %15, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %14:vgpr_32 = IMPLICIT_DEF
+# GCN: %7:vgpr_32 = V_ADD_F32_dpp %14, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %13:vgpr_32 = IMPLICIT_DEF
+# GCN: %9:vgpr_32 = V_ADD_F32_dpp %13, 3, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = IMPLICIT_DEF
+# GCN: %11:vgpr_32 = V_ADD_F32_dpp %12, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+
+name: mov_e64_dpp
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+
+ ; default modifiers
+ %3:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; both neg modifiers
+ %5:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 1, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %6:vgpr_32 = V_ADD_F32_e64 1, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; neg, abs modifiers
+ %7:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 1, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %8:vgpr_32 = V_ADD_F32_e64 2, %7, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; abs, neg modifiers
+ %9:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %10:vgpr_32 = V_ADD_F32_e64 1, %9, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; both abs modifiers
+ %11:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %12:vgpr_32 = V_ADD_F32_e64 2, %11, 0, %0, 0, 0, implicit $mode, implicit $exec
+...
+
+# check for clamp
+# GCN-LABEL: name: mov_e64_dpp_clamp
+# GCN: %7:vgpr_32 = IMPLICIT_DEF
+# GCN: %3:vgpr_32 = V_ADD_F32_dpp %7, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %5:vgpr_32 = V_ADD_F32_e64_dpp %7, 0, %1, 0, %0, 1, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+name: mov_e64_dpp_clamp
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+
+ ; ADD has no clamp
+ %3:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
+
+ ; ADD has clamp
+ %5:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
+ %6:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 1, 0, implicit $mode, implicit $exec
+...
+
# tests on sequences of dpp consumers
# GCN-LABEL: name: dpp_seq
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
index d2bf9b553d8e1..416ef0e712115 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
@@ -38,3 +38,42 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
%3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+---
+name: fold-clamp
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GCN-LABEL: name: fold-clamp
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 0, [[COPY1]], 1, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e64 0, %1, 0, implicit $exec
+ %3:vgpr_32 = V_ADD_U32_e64 %2, %1, 1, implicit $exec
+...
+
+---
+name: fold-clamp-modifier
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GCN-LABEL: name: fold-clamp-modifier
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, [[COPY1]], 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e64_]], [[COPY1]], 1, implicit $exec
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e64 1, %1, 0, implicit $exec
+ %3:vgpr_32 = V_ADD_U32_e64 %2, %1, 1, implicit $exec
>From 8e63a7116f94528d52c756d148ad7dc02beae9d9 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 18 Jan 2024 14:29:11 +0100
Subject: [PATCH 10/20] Formating
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index b0f44b0594a09..12c43e6523c13 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -306,10 +306,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.addImm(Mod0->getImm());
++NumOperands;
} else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
- if (MovMod)
- DPPInst.addImm(MovMod->getImm());
- else
- DPPInst.addImm(0);
+ DPPInst.addImm(MovMod ? MovMod->getImm() : 0);
++NumOperands;
}
>From 844788375e692c4f588aacaf9991bb8ef679793f Mon Sep 17 00:00:00 2001
From: ankurepa <154989521+ankurepa at users.noreply.github.com>
Date: Thu, 18 Jan 2024 14:53:56 +0100
Subject: [PATCH 11/20] Update llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Mirko Brkušanin <Mirko.Brkusanin at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4ccb551971436..90b021699ba7f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3310,14 +3310,11 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
return true;
- case AMDGPU::V_MOV_B32_e64:
- if (MI
- .getOperand(AMDGPU::getNamedOperandIdx(
- AMDGPU::V_MOV_B32_e64, AMDGPU::OpName::src0_modifiers))
- .getImm() == 0)
- return true;
- else
- return false;
+ case AMDGPU::V_MOV_B32_e64: {
+ int16_t Idx = AMDGPU::getNamedOperandIdx(AMDGPU::V_MOV_B32_e64,
+ AMDGPU::OpName::src0_modifiers);
+ return MI.getOperand(Idx).getImm() == SISrcMods::NONE;
+ }
default:
return false;
}
>From 223e0e82e7ad66fe64efc9a824a23e45039590e9 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 18 Jan 2024 15:41:49 +0100
Subject: [PATCH 12/20] Pattern and VOP_MOVRELD fix
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a112e14e6b443..e2e13695f6a5e 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -397,18 +397,13 @@ def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
// to be a src operand. The custom inserter must add a tied implicit
// def and use of the super register since there seems to be no way to
// add an implicit def of a virtual register in tablegen.
-class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> {
+class VOP_MOVREL<RegisterOperand Src1RC, InputMods SrcModRC> : VOPProfile<[untyped, i32, untyped, untyped]> {
let Src0RC32 = VOPDstOperand<VGPR_32>;
let Src0RC64 = VOPDstOperand<VGPR_32>;
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
-
- let Ins64 = !con((ins Src0RC64:$vdst),
- !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers),
- (ins FPVRegInputMods:$src0_modifiers)),
- (ins Src1RC:$src0, clampmod0:$clamp));
-
+ let Ins64 = (ins Src0RC64:$vdst, SrcModRC:$src0_modifiers, Src1RC:$src0, clampmod0:$clamp);
let Asm32 = getAsm32<1, 1>.ret;
let OutsSDWA = (outs Src0RC32:$vdst);
@@ -446,12 +441,12 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let HasModifiers = 1;
let HasClamp = 1;
- let Src0Mod = !if(!eq(Src1RC, VSrc_b32), FP32InputMods, FPVRegInputMods);
+ let Src0Mod = SrcModRC;
let Src0ModVOP3DPP = FPVRegInputMods;
}
-def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
-def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>;
+def VOP_MOVRELD : VOP_MOVREL<VSrc_b32, FP32InputMods>;
+def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32, FPVRegInputMods>;
let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
// v_movreld_b32 is a special case because the destination output
@@ -1416,9 +1411,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
- timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
>From c5aa532937d9ae349fffba05491d78de94c86382 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Thu, 18 Jan 2024 16:42:31 +0100
Subject: [PATCH 13/20] Patterns
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index e2e13695f6a5e..a3059ed35e0c7 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1387,9 +1387,8 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
- timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Only]
@@ -1398,9 +1397,8 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)),
- timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
>From 0abc964c4cdead88aa2b4a0e793e581ff63655cb Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Tue, 30 Jan 2024 17:43:09 +0100
Subject: [PATCH 14/20] remove v_mov_b32 modificatons
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 40 +--
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 21 +-
.../global-atomic-fadd.f32-no-rtn.ll | 14 +-
.../GlobalISel/global-atomic-fadd.f32-rtn.ll | 14 +-
llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 4 +-
llvm/test/CodeGen/AMDGPU/dpp_combine.mir | 180 ++++++-------
.../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 249 +++++++-----------
.../CodeGen/AMDGPU/fold-mov-modifiers.mir | 79 ------
.../AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 34 +--
.../AMDGPU/global-atomic-fadd.f32-rtn.ll | 14 +-
.../CodeGen/AMDGPU/inserted-wait-states.mir | 4 +-
llvm/test/CodeGen/AMDGPU/remat-vop.mir | 24 +-
.../AMDGPU/schedule-ilp-liveness-tracking.mir | 8 +-
.../AMDGPU/sdwa-peephole-instr-gfx10.mir | 2 +-
.../CodeGen/AMDGPU/sdwa-peephole-instr.mir | 2 +-
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 4 +-
llvm/test/CodeGen/AMDGPU/vopc_dpp.mir | 34 +--
llvm/test/CodeGen/AMDGPU/wqm.mir | 6 +-
llvm/test/MC/AMDGPU/gfx10_asm_vop1.s | 9 -
.../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 54 ----
.../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 15 --
.../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 9 -
.../test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s | 9 -
.../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 54 ----
.../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s | 15 --
.../MC/Disassembler/AMDGPU/gfx10_vop3.txt | 6 -
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 54 ----
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 12 -
.../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 9 -
.../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 9 -
.../gfx12_dasm_vop3_from_vop1_dpp16.txt | 54 ----
.../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 12 -
33 files changed, 279 insertions(+), 783 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 12c43e6523c13..fdae6823db14c 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -208,7 +208,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
bool CombBCZ,
bool IsShrinkable) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
- MovMI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
@@ -276,37 +275,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
- MachineOperand *MovMod = nullptr;
- if (AMDGPU::hasNamedOperand(MovMI.getOpcode(),
- AMDGPU::OpName::src0_modifiers)) {
- MovMod = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0_modifiers);
- if (MovMod)
- assert(0LL == (MovMod->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
- }
- if (auto *Mod0 =
- TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers)) {
- assert(NumOperands ==
- AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src0_modifiers));
+
+ auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
+ if (Mod0) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src0_modifiers));
assert(HasVOP3DPP ||
(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
- // MovMod MIMod
- // abs abs -> abs
- // abs neg -> abs|neg
- // neg abs -> abs
- // neg neg -> 0
- if (MovMod && MovMod->getImm() == SISrcMods::ABS &&
- Mod0->getImm() == SISrcMods::NEG)
- DPPInst.addImm(SISrcMods::ABS | SISrcMods::NEG);
- else if (MovMod && MovMod->getImm() == SISrcMods::NEG &&
- Mod0->getImm() == SISrcMods::NEG)
- DPPInst.addImm(0);
- else if (Mod0->getImm() == SISrcMods::NONE && MovMod)
- DPPInst.addImm(MovMod->getImm());
- else
- DPPInst.addImm(Mod0->getImm());
+ DPPInst.addImm(Mod0->getImm());
++NumOperands;
} else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
- DPPInst.addImm(MovMod ? MovMod->getImm() : 0);
+ DPPInst.addImm(0);
++NumOperands;
}
@@ -529,7 +508,6 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
- MovMI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
@@ -769,9 +747,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
- if ((MI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
- MI.getOpcode() == AMDGPU::V_MOV_B32_e64_dpp) &&
- combineDPPMov(MI)) {
+ if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
} else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 90b021699ba7f..ebe23a5eac57b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,8 +2600,6 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
- if (I == 2)
- MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
APInt Imm(64, SrcOp.getImm());
@@ -3298,6 +3296,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::V_MOV_B64_e32:
case AMDGPU::V_MOV_B64_e64:
@@ -3310,11 +3309,6 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
return true;
- case AMDGPU::V_MOV_B32_e64: {
- int16_t Idx = AMDGPU::getNamedOperandIdx(AMDGPU::V_MOV_B32_e64,
- AMDGPU::OpName::src0_modifiers);
- return MI.getOperand(Idx).getImm() == SISrcMods::NONE;
- }
default:
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a3059ed35e0c7..27b3f8a31f19a 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -222,13 +222,6 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
let InsVOPDY = (ins Src0RC32:$src0Y);
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
-
- let HasModifiers = 1;
- let HasClamp = 1;
-
- let Src0Mod = FP32InputMods;
- let Src0ModVOP3DPP = FPVRegInputMods;
- let Src0ModDPP = FPVRegInputMods;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1282,18 +1275,18 @@ def V_MOV_B32_indirect_read : VPseudoInstSI<
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp_ctrl, timm:$row_mask,
+ (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
class UpdateDPPPat<ValueType vt> : GCNPat <
- (vt (int_amdgcn_update_dpp vt:$old,(vt(VOP3Mods vt:$src, i32:$src0_modifiers)), timm:$dpp_ctrl,
+ (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$old,i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
@@ -1388,7 +1381,7 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Only]
@@ -1398,7 +1391,7 @@ def : GCNPat <
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1410,7 +1403,7 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, SRCMODS.NONE, VGPR_32:$src,
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src,VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index c6b6572082d05..ff75fdbd0ed95 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -174,22 +174,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]]
@@ -221,4 +221,4 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index b74ac41ed813b..c9efb4b45cd22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -176,24 +176,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], 0, [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]]
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16
@@ -242,4 +242,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index b9446a09b0302..dfaa7b4efac39 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -39,8 +39,8 @@ body: |
# DPP64 does not support all control values and must be split to become legal
# GCN-LABEL: name: dpp64_illegal_ctrl
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
-# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
# GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec
name: dpp64_illegal_ctrl
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index a3f915cc6b124..becc2bb095cc4 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -25,29 +25,29 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -109,29 +109,29 @@ body: |
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -158,19 +158,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 15, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 15, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 15, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -196,19 +196,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -234,19 +234,19 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 1, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %6, %1, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 1, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %9, %1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 1, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %12, %1, implicit $exec
...
@@ -268,28 +268,28 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e32 %1, %3, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e32 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e32 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e32 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %18:vgpr_32 = V_MOV_B32_dpp %17, 0, %0, 1, 14, 15, 0, implicit $exec
+ %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
%19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
...
@@ -297,7 +297,7 @@ body: |
# check for floating point modifiers
# GCN-LABEL: name: add_f32_e64
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -314,19 +314,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as omod is set
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this shouldn't be combined as modifiers aren't abs|neg
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0,%1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -346,11 +346,11 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this shouldn't be combined as clamp is set
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
@@ -368,7 +368,7 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this shouldn't be combined as the carry-out is used
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, %1, 0, implicit $exec
S_NOP 0, implicit %5
@@ -380,7 +380,7 @@ body: |
# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -391,12 +391,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec
@@ -418,7 +418,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
%5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -450,7 +450,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -471,13 +471,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -488,7 +488,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -511,7 +511,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -528,7 +528,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -545,7 +545,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -557,7 +557,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -569,7 +569,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -580,43 +580,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -637,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -649,7 +649,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -662,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -673,7 +673,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
@@ -687,8 +687,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -698,8 +698,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -712,8 +712,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -723,8 +723,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -739,8 +739,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -753,8 +753,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
@@ -768,8 +768,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
@@ -803,7 +803,7 @@ body: |
...
# GCN-LABEL: name: dpp64_add64_first_combined
-# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, 0, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
+# GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1
# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
# GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec
@@ -827,7 +827,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_64_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
...
@@ -847,7 +847,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -862,7 +862,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -876,8 +876,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -903,7 +903,7 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %5, %bb.2
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[PHI]], 323, 15, 15, 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[PHI]], 323, 15, 15, 0, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -925,7 +925,7 @@ body: |
bb.1:
%4:vgpr_32 = PHI %1, %bb.0, %5, %bb.2
- %5:vgpr_32 = V_MOV_B32_dpp %1, 0, %4, 323, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %1, %4, 323, 15, 15, 0, implicit $exec
bb.2:
%6:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fd495ee9b11a6..fe1345e29f133 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -19,7 +19,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
%5:sreg_32_xm0_xexec = IMPLICIT_DEF
%6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
@@ -27,11 +27,11 @@ body: |
%8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
; should not be combined because src2 literal is illegal
- %9:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
; should not be combined on subtargets where src1 imm is illegal
- %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
...
---
@@ -58,23 +58,23 @@ body: |
%4:vgpr_32 = IMPLICIT_DEF
; should be combined because src2 allows sgpr
- %5:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %7:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow sgpr for src1
- %9:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
; should be combined only on subtargets that allow inlinable constants for src1
- %11:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
; should not be combined when literal constants are used
- %13:vgpr_32 = V_MOV_B32_dpp %4, 0, %0, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
%14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
...
---
@@ -93,9 +93,9 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %5, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
...
@@ -111,9 +111,9 @@ body: |
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -125,23 +125,23 @@ body: |
%3:vgpr_32 = IMPLICIT_DEF
; this should not be combined because op_sel is not zero
- %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
; this should not be combined because op_sel_hi is not all set
- %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
- %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
- %14:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
...
@@ -158,7 +158,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %0, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
...
@@ -179,23 +179,23 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
%5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 14, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
%7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 15, 14, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
%13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %14, 0, %0, 1, 14, 15, 0, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
%16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
...
@@ -215,13 +215,13 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
...
@@ -243,24 +243,24 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
%5:vgpr_32 = IMPLICIT_DEF
- %6:vgpr_32 = V_MOV_B32_dpp %5, 0, %0, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
%7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %8, 0, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
%11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %12:vgpr_32 = V_MOV_B32_dpp %11, 0, %0, 1, 14, 15, 0, implicit $exec
+ %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
%13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
; this cannot be combined because immediate as src0 isn't commutable
%15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %16:vgpr_32 = V_MOV_B32_dpp %15, 0, %0, 1, 14, 15, 0, implicit $exec
+ %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
%17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
...
@@ -284,19 +284,19 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined as e64
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
; this should be combined and shrunk as all modifiers are default
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined and shrunk as modifiers other than abs|neg are default
- %7:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
; this should be combined as e64
- %9:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...
@@ -316,88 +316,21 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; this should be combined and shrunk as all modifiers are default
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
; this should be combined as _e64
- %5:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
%6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...
-# check for mov_e64_dpp modifiers
-# GCN-LABEL: name: mov_e64_dpp
-# GCN: %16:vgpr_32 = IMPLICIT_DEF
-# GCN: %3:vgpr_32 = V_ADD_F32_dpp %16, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %15:vgpr_32 = IMPLICIT_DEF
-# GCN: %5:vgpr_32 = V_ADD_F32_dpp %15, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %14:vgpr_32 = IMPLICIT_DEF
-# GCN: %7:vgpr_32 = V_ADD_F32_dpp %14, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %13:vgpr_32 = IMPLICIT_DEF
-# GCN: %9:vgpr_32 = V_ADD_F32_dpp %13, 3, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %12:vgpr_32 = IMPLICIT_DEF
-# GCN: %11:vgpr_32 = V_ADD_F32_dpp %12, 2, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-
-name: mov_e64_dpp
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
-
- ; default modifiers
- %3:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; both neg modifiers
- %5:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 1, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %6:vgpr_32 = V_ADD_F32_e64 1, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; neg, abs modifiers
- %7:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 1, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %8:vgpr_32 = V_ADD_F32_e64 2, %7, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; abs, neg modifiers
- %9:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %10:vgpr_32 = V_ADD_F32_e64 1, %9, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; both abs modifiers
- %11:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %12:vgpr_32 = V_ADD_F32_e64 2, %11, 0, %0, 0, 0, implicit $mode, implicit $exec
-...
-
-# check for clamp
-# GCN-LABEL: name: mov_e64_dpp_clamp
-# GCN: %7:vgpr_32 = IMPLICIT_DEF
-# GCN: %3:vgpr_32 = V_ADD_F32_dpp %7, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
-# GCN: %5:vgpr_32 = V_ADD_F32_e64_dpp %7, 0, %1, 0, %0, 1, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
-name: mov_e64_dpp_clamp
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
-
- ; ADD has no clamp
- %3:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
-
- ; ADD has clamp
- %5:vgpr_32 = V_MOV_B32_e64_dpp undef %0, 0, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- %6:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 1, 0, implicit $mode, implicit $exec
-...
-
# tests on sequences of dpp consumers
# GCN-LABEL: name: dpp_seq
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
-# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
name: dpp_seq
tracksRegLiveness: true
@@ -408,12 +341,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
; this breaks the sequence
%9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec
@@ -435,7 +368,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
%6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
@@ -467,7 +400,7 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 1, 1, 0, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...
@@ -488,13 +421,13 @@ body: |
S_BRANCH %bb.1
bb.1:
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...
# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
name: exec_changed
tracksRegLiveness: true
@@ -505,7 +438,7 @@ body: |
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
%5:sreg_64 = COPY $exec, implicit-def $exec
%6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
@@ -528,7 +461,7 @@ body: |
%3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
- %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, 0, %1, 1, 1, 1, 0, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
%7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...
@@ -545,7 +478,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 1, 1, 0, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -562,7 +495,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
- %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, 0, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...
@@ -574,7 +507,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
@@ -586,7 +519,7 @@ tracksRegLiveness: true
body: |
bb.0:
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...
@@ -597,43 +530,43 @@ name: dpp_undef_old
tracksRegLiveness: true
body: |
bb.0:
- %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
-# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
bb.0:
- $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
-# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
bb.0:
- %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...
@@ -654,8 +587,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -666,7 +599,7 @@ body: |
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -679,8 +612,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -690,7 +623,7 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
@@ -704,8 +637,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -715,8 +648,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -729,8 +662,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 1, 1, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
%7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -740,8 +673,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: S_BRANCH %bb.1
# GCN: bb.1:
@@ -756,8 +689,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
S_BRANCH %bb.1
@@ -770,8 +703,8 @@ body: |
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
-# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
@@ -785,8 +718,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
%6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
@@ -794,8 +727,8 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2_reject
-#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
@@ -808,8 +741,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
; use of dpp arg as src2, reject
@@ -819,7 +752,7 @@ body: |
...
# GCN-LABEL: name: dpp_reg_sequence_src2
-#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -831,8 +764,8 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
%6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
@@ -877,12 +810,12 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
%4:sreg_32_xm0_xexec = IMPLICIT_DEF
%5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
; src2 is legal for _e64
- %6:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
%7:sreg_32_xm0_xexec = IMPLICIT_DEF
%8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
...
@@ -902,7 +835,7 @@ body: |
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
- %3:vgpr_32 = V_MOV_B32_dpp undef %2, 0, %1, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0, implicit %4
@@ -917,7 +850,7 @@ body: |
liveins: $vgpr0, $vgpr1
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_dpp %0, 0, %1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...
@@ -931,8 +864,8 @@ body: |
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
- %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, 0, %1.sub0, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, 0, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
%4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir b/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
deleted file mode 100644
index 416ef0e712115..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/fold-mov-modifiers.mir
+++ /dev/null
@@ -1,79 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
-
----
-name: fold-no-modifier
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1, $vcc
-
- ; GCN-LABEL: name: fold-no-modifier
- ; GCN: liveins: $vgpr0, $vgpr1, $vcc
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $vcc, implicit $exec
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
- %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
-...
-
----
-name: no-fold-with-modifier
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1, $vcc
-
- ; GCN-LABEL: name: no-fold-with-modifier
- ; GCN: liveins: $vgpr0, $vgpr1, $vcc
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
- ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[COPY]], [[V_MOV_B32_e64_]], implicit-def $vcc, implicit $vcc, implicit $exec
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_e64 1, 1, 0, implicit $exec
- %3:vgpr_32 = V_ADDC_U32_e32 %0, %2, implicit-def $vcc, implicit $vcc, implicit $exec
-...
-
----
-name: fold-clamp
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GCN-LABEL: name: fold-clamp
- ; GCN: liveins: $vgpr0, $vgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 0, [[COPY1]], 1, implicit $exec
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_e64 0, %1, 0, implicit $exec
- %3:vgpr_32 = V_ADD_U32_e64 %2, %1, 1, implicit $exec
-...
-
----
-name: fold-clamp-modifier
-tracksRegLiveness: true
-body: |
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GCN-LABEL: name: fold-clamp-modifier
- ; GCN: liveins: $vgpr0, $vgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GCN-NEXT: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, [[COPY1]], 0, implicit $exec
- ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e64_]], [[COPY1]], 1, implicit $exec
- %0:vgpr_32 = COPY $vgpr0
- %1:vgpr_32 = COPY $vgpr1
- %2:vgpr_32 = V_MOV_B32_e64 1, %1, 0, implicit $exec
- %3:vgpr_32 = V_ADD_U32_e64 %2, %1, 1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index 9f23ad9459739..f02a45dee93be 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -173,17 +173,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX908-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX908-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX908-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX908-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -235,17 +235,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
@@ -293,13 +293,13 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX11_GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11_GFX12-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11_GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 353, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 354, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 356, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
+ ; GFX11_GFX12-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 360, 15, 15, 0, implicit $exec
; GFX11_GFX12-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11_GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11_GFX12-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
@@ -330,4 +330,4 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 699c5e61b30bb..39e09b291d131 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -179,19 +179,19 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY]], killed [[S_MOV_B32_1]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], killed [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], killed [[S_MOV_B32_3]]
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
@@ -239,4 +239,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 09703117f41a4..cd80098b01209 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -419,11 +419,11 @@ name: dpp
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_dpp $vgpr1, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr3 = V_MOV_B32_dpp $vgpr3, 0, $vgpr0, 0, 15, 15, 0, implicit $exec
+ $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 720736b51c839..248a9e2ddb636 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -82,16 +82,16 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e64
- ; GCN: renamable $vgpr0 = V_MOV_B32_e64 0, 1, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 0, 2, 0, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 0, 3, 0, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %0:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
- %1:vgpr_32 = V_MOV_B32_e64 0, 2, 0, implicit $exec
- %2:vgpr_32 = V_MOV_B32_e64 0, 3, 0, implicit $exec
+ %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
S_NOP 0, implicit %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
@@ -105,10 +105,10 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
- ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
@@ -116,9 +116,9 @@ body: |
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
- %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
- %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
index 5c6783b376a1a..4b6e204ecf957 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir
@@ -9,8 +9,8 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
- ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ ; CHECK-NEXT: %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
+ ; CHECK-NEXT: %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
; CHECK-NEXT: %live0:vgpr_32 = V_ADD_U32_e32 %src0, %src1, implicit $exec
; CHECK-NEXT: %live1:vgpr_32 = V_ADD_U32_e32 %live0, %src1, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -20,8 +20,8 @@ body: |
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
- %src0:vgpr_32 = V_MOV_B32_e64 0, 0, 0, implicit $exec
- %src1:vgpr_32 = V_MOV_B32_e64 0, 1, 0, implicit $exec
+ %src0:vgpr_32 = V_MOV_B32_e64 0, implicit $exec
+ %src1:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
%live0:vgpr_32 = V_ADD_U32_e32 %src0:vgpr_32, %src1:vgpr_32, implicit $exec
%live1:vgpr_32 = V_ADD_U32_e32 %live0:vgpr_32, %src1:vgpr_32, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
index ec9e1db6e7649..dcb51fcb76653 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir
@@ -100,7 +100,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
+ %26 = V_MOV_B32_e64 %25, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
index 4108102e865d6..a2cad1398bcd1 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
@@ -109,7 +109,7 @@ body: |
%24 = V_LSHLREV_B32_e64 16, %23, implicit $exec
%25 = V_LSHRREV_B32_e64 16, %3, implicit $exec
- %26 = V_MOV_B32_e64 0, %25, 0, implicit $exec
+ %26 = V_MOV_B32_e64 %25, implicit $exec
%26 = V_LSHLREV_B32_e64 16, %26, implicit $exec
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec
%28 = V_LSHLREV_B32_e64 16, %27, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 1a04fdc8bae1a..7531ad9741c6c 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -54,8 +54,8 @@ body: |
%18 = V_LSHRREV_B32_e64 16, %17, implicit $exec
%19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec
- %20 = V_MOV_B32_e64 0, %19, 0, implicit $exec
+ %20 = V_MOV_B32_e64 %19, implicit $exec
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
- S_SETPC_B64_return $sgpr30_sgpr31
+ S_SETPC_B64_return $sgpr30_sgpr31
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index c29bccf2fa6a5..b8c935927ff70 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -17,15 +17,15 @@ body: |
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
@@ -35,41 +35,41 @@ body: |
%2:vgpr_32 = COPY $vgpr2
%3:vgpr_32 = IMPLICIT_DEF
- %4:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
- %6:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %8:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
- %10:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
; shrink
- %13:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink True16 instructions
- %15:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
- %17:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec
%19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc
; commute
- %20:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 15, 15, 1, implicit $exec
+ %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec
...
@@ -88,9 +88,9 @@ body: |
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], 0, [[COPY1]], 1, 15, 14, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], 0, [[COPY1]], 1, 13, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
@@ -99,10 +99,10 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
- %4:vgpr_32 = V_MOV_B32_dpp %2, 0, %1, 1, 15, 14, 1, implicit $exec
+ %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
%99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %3, 0, %1, 1, 13, 15, 1, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 27373dee64f33..ef6d0780f395f 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
--- |
define amdgpu_ps void @test_strict_wwm_scc() {
@@ -178,7 +178,7 @@ body: |
%11:vgpr_32 = COPY %16
%10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
- %13:vgpr_32 = V_MOV_B32_dpp %14, 0, killed %10, 323, 12, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = STRICT_WWM killed %13, implicit $exec
BUFFER_STORE_DWORD_OFFSET_exact killed %15, %6, %7, 4, 0, 0, implicit $exec
S_ENDPGM 0
@@ -210,7 +210,7 @@ body: |
%8:sreg_64 = COPY $exec
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
- %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, 0, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
index e84f559c854a4..6d7b89baf5272 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop1.s
@@ -106,15 +106,6 @@ v_mov_b32_e64 v5, 0.5
v_mov_b32_e64 v5, -4.0
// GFX10: encoding: [0x05,0x00,0x81,0xd5,0xf7,0x00,0x00,0x00]
-v_mov_b32_e64 v5, |v1|
-// GFX10: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-
-v_mov_b32_e64 v5, -v1
-// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
-
-v_mov_b32_e64 v5, -m0
-// GFX10: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
-
v_mov_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
// GFX10: encoding: [0xf9,0x02,0x0a,0x7e,0x01,0x06,0x06,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index 74f6e9391c989..38eb00aecafb1 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -1932,60 +1932,6 @@ v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl
v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
-v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_mirror
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_half_mirror
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shl:1
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shl:15
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shr:1
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shr:15
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_ror:1
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_ror:15
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
-
-v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
-v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
-
-v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
-// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
-// GFX11: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index 6db9923c41f5f..dc1d0cc4c193e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -489,21 +489,6 @@ v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
-
v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
index c1790c953d906..efd71b4dcce06 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s
@@ -2538,15 +2538,6 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_mov_b32_e64 v5, |v1|
-// GFX11: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-
-v_mov_b32_e64 v5, -m0
-// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
-
-v_mov_b32_e64 v5, -v1
-// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
-
v_movreld_b32_e64 v5, v1
// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
index ebda8b65f8505..0cb28bc5c8a2f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s
@@ -2538,15 +2538,6 @@ v_mov_b32_e64 v5, src_scc
v_mov_b32_e64 v255, 0xaf123456
// GFX12: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_mov_b32_e64 v5, |v1|
-// GFX12: encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-
-v_mov_b32_e64 v5, -m0
-// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
-
-v_mov_b32_e64 v5, -v1
-// GFX12: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
-
v_movreld_b32_e64 v5, v1
// GFX12: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index e6fbe31586afd..a32e531e07661 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -1932,60 +1932,6 @@ v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl
v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
-v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_mirror
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_half_mirror
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shl:1
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shl:15
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shr:1
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_shr:15
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_ror:1
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_ror:15
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
-
-v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
-v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
-
-v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-
-v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf
-// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-
-v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
-// GFX12: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
index 4d21a54b35474..17f08ee1c9b0f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
@@ -489,21 +489,6 @@ v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX12: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
-
-v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-
-v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
index d9661b1e7a433..368e0f20163d1 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt
@@ -14004,12 +14004,6 @@
# GFX10: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00]
0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00
-# GFX10: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
-
-# GFX10: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20]
-0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x20
-
# GFX10: v_movreld_b32_e64 v255, v1 ; encoding: [0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0xff,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index f9f76a1ae2147..283b5536c6d26 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -1722,60 +1722,6 @@
# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
-# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01
-
-# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13
-
-# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
-0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index e862e32c456e4..a2dafb06caf14 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -396,18 +396,6 @@
# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
-# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
-# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
-0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
-
-# GFX11: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX11: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index 8fbd2708f0f26..f66eaed3fe266 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -2313,15 +2313,6 @@
# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX11: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
-
-# GFX11: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
-0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
-
-# GFX11: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
-0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
-
# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index c7c1ba84e86f6..7d8f48f6bdd80 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -2313,15 +2313,6 @@
# GFX12: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf
-# GFX12: v_mov_b32_e64 v5, |v1| ; encoding: [0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00]
-0x05,0x01,0x81,0xd5,0x01,0x01,0x00,0x00
-
-# GFX12: v_mov_b32_e64 v5, -m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20]
-0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x20
-
-# GFX12: v_mov_b32_e64 v5, -v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20]
-0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x20
-
# GFX12: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00]
0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index 47e2e7bd0afa5..b2b9dc3210562 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -1722,60 +1722,6 @@
# GFX12: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
-# GFX12: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01
-
-# GFX12: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13
-
-# GFX12: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
-0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
-
-# GFX12: v_mov_b32_e64_dpp v5, -v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x1b,0x00,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, |v1| row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, -v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff]
-0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x20,0x01,0x01,0x01,0xff
-
-# GFX12: v_mov_b32_e64_dpp v5, |v1| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
-0x05,0x01,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13
-
# GFX12: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index 964f3c3126cb0..919c435c77619 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -396,18 +396,6 @@
# GFX12: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
-# GFX12: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
-# GFX12: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
-0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
-
-# GFX12: v_mov_b32_e64_dpp v5, -v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05]
-0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x20,0x01,0x77,0x39,0x05
-
-# GFX12: v_mov_b32_e64_dpp v5, |v1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
-0x05,0x01,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
-
# GFX12: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From 0463777a2196adeb302e5456f1f0a1c4b4eca253 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Wed, 31 Jan 2024 09:36:04 +0100
Subject: [PATCH 15/20] Added removed vailid tests
---
.../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 42 +++++++++++++++++++
.../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 9 ++++
.../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 42 +++++++++++++++++++
.../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s | 9 ++++
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 42 +++++++++++++++++++
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 6 +++
.../gfx12_dasm_vop3_from_vop1_dpp16.txt | 42 +++++++++++++++++++
.../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 6 +++
8 files changed, 198 insertions(+)
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
index 38eb00aecafb1..1483993b68863 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s
@@ -1932,6 +1932,48 @@ v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl
v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_mirror
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_half_mirror
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shl:1
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shl:15
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shr:1
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shr:15
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_ror:1
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_ror:15
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
+
+v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
+v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+
v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
index dc1d0cc4c193e..e001aed42fa4f 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s
@@ -489,6 +489,15 @@ v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+
v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
index a32e531e07661..c5e7740d06711 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s
@@ -1932,6 +1932,48 @@ v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl
v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30]
+v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3]
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_mirror
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_half_mirror
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shl:1
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shl:15
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shr:1
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_shr:15
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_ror:1
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_ror:15
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+
+v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
+
+v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13]
+
+v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30]
+
v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
index 17f08ee1c9b0f..6ea725e897d6c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s
@@ -489,6 +489,15 @@ v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+
+v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+
v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index 283b5536c6d26..e4d1d0281a130 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -1722,6 +1722,48 @@
# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
+# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01
+
+# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13
+
+# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index a2dafb06caf14..a662732568be5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -396,6 +396,12 @@
# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
+# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+
# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index b2b9dc3210562..72cb3b5a78b97 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -1722,6 +1722,48 @@
# GFX12: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
+# GFX12: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01
+
+# GFX12: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13]
+0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13
+
+# GFX12: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30]
+0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index 919c435c77619..0a80c5ce5db22 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -396,6 +396,12 @@
# GFX12: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
+# GFX12: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
+0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
+
+# GFX12: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00]
+0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00
+
# GFX12: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
>From ccc030f4b064d708ff857915d0b296d1cae487ca Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Wed, 31 Jan 2024 09:49:41 +0100
Subject: [PATCH 16/20] Test fix
---
.../CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll | 2 +-
.../CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll | 2 +-
llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll | 2 +-
llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll | 2 +-
llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index ff75fdbd0ed95..bfb2ecde783a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -221,4 +221,4 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index c9efb4b45cd22..d2c42292a0364 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -242,4 +242,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index f02a45dee93be..8cd25298d7473 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -330,4 +330,4 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
\ No newline at end of file
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 39e09b291d131..77be7f506aaf2 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -239,4 +239,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
\ No newline at end of file
+attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
index 7531ad9741c6c..876fa6f5f2744 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir
@@ -58,4 +58,4 @@ body: |
FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
$sgpr30_sgpr31 = COPY %2
- S_SETPC_B64_return $sgpr30_sgpr31
\ No newline at end of file
+ S_SETPC_B64_return $sgpr30_sgpr31
>From 97e74da88ea5fcceda4dc02f2a5080bd95117eea Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Wed, 31 Jan 2024 09:59:24 +0100
Subject: [PATCH 17/20] gcndppCombine fix
---
llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index fdae6823db14c..a75082268c773 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -40,7 +40,6 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -275,9 +274,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
-
- auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
- if (Mod0) {
+ if (auto *Mod0 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src0_modifiers)) {
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
AMDGPU::OpName::src0_modifiers));
assert(HasVOP3DPP ||
@@ -288,7 +286,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.addImm(0);
++NumOperands;
}
-
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
assert(Src0);
int Src0Idx = NumOperands;
>From 105f93502f84830c35c3a83d1eed179a946e99af Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Wed, 31 Jan 2024 10:06:57 +0100
Subject: [PATCH 18/20] Whitespace
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27b3f8a31f19a..33ae204f39531 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1384,10 +1384,13 @@ def : GCNPat <
(V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
+
} // End OtherPredicates = [isGFX10Only]
+
//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
+
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
@@ -1402,8 +1405,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src,VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src,VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
>From 1fb3ec027baf4bc8ed4da5b2f3a22e9365d93b05 Mon Sep 17 00:00:00 2001
From: ankurepa <Anja.Kurepa at syrmia.com>
Date: Wed, 31 Jan 2024 10:08:10 +0100
Subject: [PATCH 19/20] Whitespace
---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 33ae204f39531..ca29b297db996 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1384,7 +1384,6 @@ def : GCNPat <
(V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
-
} // End OtherPredicates = [isGFX10Only]
//===----------------------------------------------------------------------===//
@@ -1406,7 +1405,7 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src,VGPR_32:$src,
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
>From 1d687c8b42546b9017eaa0af4f8489f7b7307522 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 31 Jan 2024 09:21:44 +0000
Subject: [PATCH 20/20] [mlir][vector] Disable transpose -> shuffle lowering
for scalable vectors (#79979)
vector.shuffle is not supported for scalable vectors (outside of splats)
---
.../Transforms/LowerVectorTranspose.cpp | 4 ++++
.../Vector/vector-transpose-lowering.mlir | 21 +++++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp
index 97f6caca1b25c..792550dcfaf22 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp
@@ -434,6 +434,10 @@ class TransposeOp2DToShuffleLowering
return rewriter.notifyMatchFailure(
op, "not using vector shuffle based lowering");
+ if (op.getSourceVectorType().isScalable())
+ return rewriter.notifyMatchFailure(
+ op, "vector shuffle lowering not supported for scalable vectors");
+
auto srcGtOneDims = isTranspose2DSlice(op);
if (failed(srcGtOneDims))
return rewriter.notifyMatchFailure(
diff --git a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir
index c0b44428d5bcf..97b698edeb059 100644
--- a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir
@@ -849,3 +849,24 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
+
+// -----
+
+// Scalable transposes should not be lowered to vector.shuffle.
+
+// CHECK-LABEL: func @transpose_nx8x2xf32
+func.func @transpose_nx8x2xf32(%arg0: vector<[8]x2xf32>) -> vector<2x[8]xf32> {
+ // CHECK-NOT: vector.shuffle
+ // CHECK: vector.transpose %{{.*}} : vector<[8]x2xf32> to vector<2x[8]xf32>
+ %0 = vector.transpose %arg0, [1, 0] : vector<[8]x2xf32> to vector<2x[8]xf32>
+ return %0 : vector<2x[8]xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%func_op: !transform.op<"func.func"> {transform.readonly}) {
+ transform.apply_patterns to %func_op {
+ transform.apply_patterns.vector.lower_transpose lowering_strategy = "shuffle_1d"
+ } : !transform.op<"func.func">
+ transform.yield
+ }
+}
More information about the Mlir-commits
mailing list