[llvm] [AMDGPU][MC] Support src modifiers for v_mov_b32 and v_movrel* instructions (PR #76498)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 28 02:24:49 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-mc
Author: None (ankurepa)
<details>
<summary>Changes</summary>
Resolve #<!-- -->54795
---
Patch is 150.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76498.diff
31 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+51-20)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/dpp64_combine.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/dpp_combine.mir (+90-90)
- (modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+91-91)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/remat-vop.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/schedule-ilp-liveness-tracking.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/vopc_dpp.mir (+17-17)
- (modified) llvm/test/CodeGen/AMDGPU/wqm.mir (+2-2)
- (modified) llvm/test/MC/AMDGPU/gfx10_asm_vop1.s (+36)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s (+60)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s (+30)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s (+36)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s (+36)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s (+25)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt (+33)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt (+60)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt (+30)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt (+36)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt (+24)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt (+12)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ebe23a5eac57b5..e315fca0f4bf97 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2600,6 +2600,8 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
for (unsigned I = 1; I <= 2; ++I) { // old and src operands.
const MachineOperand &SrcOp = MI.getOperand(I);
+ if(I == 2)
+ MovDPP.addImm(0); // add src modifier
assert(!SrcOp.isFPImm());
if (SrcOp.isImm()) {
APInt Imm(64, SrcOp.getImm());
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac97..4bb1972cb0ea38 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -222,13 +222,20 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
let InsVOPDY = (ins Src0RC32:$src0Y);
let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FP32InputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
-defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
+ let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+ defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
@@ -369,9 +376,21 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
}
// Restrict src0 to be VGPR
+def VOP_PERMLANE : VOPProfile<[i32, i32, untyped, untyped]> {
+ let Src0RC32 = VRegSrc_32;
+ let Src0RC64 = VRegSrc_32;
+}
+
def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
+
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = FPVRegInputMods;
+ let Src0ModVOP3DPP = FPVRegInputMods;
+ let Src0ModDPP = FPVRegInputMods;
}
// Special case because there are no true output operands. Hack vdst
@@ -384,7 +403,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let Outs = (outs);
let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0);
- let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0);
+
+ let Ins64 = !con((ins Src0RC64:$vdst), !if(!eq(Src1RC, VSrc_b32), (ins FP32InputMods:$src0_modifiers), (ins FPVRegInputMods:$src0_modifiers)), (ins Src1RC:$src0, clampmod0:$clamp));
+
let Asm32 = getAsm32<1, 1>.ret;
let OutsSDWA = (outs Src0RC32:$vdst);
@@ -394,13 +415,20 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
let OutsDPP = (outs Src0RC32:$vdst);
- let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
+ let InsDPP = (ins Src0RC32:$old,
+ FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, FPVRegInputMods:$src0_modifiers,Src0RC32:$src0,dpp8:$dpp8, FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
+ let InsVOP3Base = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, clampmod0:$clamp);
+
+
let OutsVOP3DPP = (outs Src0RC64:$vdst);
let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret;
@@ -413,6 +441,11 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let HasDst = 0;
let EmitDst = 1; // force vdst emission
+ let HasModifiers = 1;
+ let HasClamp = 1;
+
+ let Src0Mod = !if(!eq(Src1RC, VSrc_b32), FP32InputMods, FPVRegInputMods);
+ let Src0ModVOP3DPP = FPVRegInputMods;
}
def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
@@ -658,9 +691,9 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let SubtargetPredicate = isGFX11Plus in {
// Restrict src0 to be VGPR
- def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
+ def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_PERMLANE,
getVOP1Pat64<int_amdgcn_permlane64,
- VOP_MOVRELS>.ret,
+ VOP_PERMLANE>.ret,
/*VOP1Only=*/ 1>;
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
@@ -1252,18 +1285,18 @@ def V_MOV_B32_indirect_read : VPseudoInstSI<
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ (i32 (int_amdgcn_mov_dpp (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
class UpdateDPPPat<ValueType vt> : GCNPat <
- (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
+ (vt (int_amdgcn_update_dpp vt:$old,(vt(VOP3Mods vt:$src, i32:$src0_modifiers)), timm:$dpp_ctrl,
timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
- (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (V_MOV_B32_dpp VGPR_32:$old,i32:$src0_modifiers, VGPR_32:$src, (as_i32timm $dpp_ctrl),
(as_i32timm $row_mask), (as_i32timm $bank_mask),
(as_i1timm $bound_ctrl))
>;
@@ -1357,20 +1390,18 @@ defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Only]
-
//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
-
let OtherPredicates = [isGFX11Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX11Only]
@@ -1381,8 +1412,8 @@ def : GCNPat <
let OtherPredicates = [isGFX12Only] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src,
+ (i32 (int_amdgcn_mov_dpp8 (i32(VOP3Mods i32:$src, i32:$src0_modifiers)), timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, i32:$src0_modifiers, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX12Only]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index bfb2ecde783a63..c6b6572082d05a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -174,22 +174,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY11]], implicit-def dead $scc, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], 0, [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_3]]
- ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
+ ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], 0, [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec
; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec
; GFX90A_GFX940-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 63
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_4]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index d2c42292a03642..b74ac41ed813ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -176,24 +176,24 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
; GFX11-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 [[COPY2]], [[COPY6]], implicit-def dead $scc, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY7]], 0, [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY8]], 0, [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY9]], 0, [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], 0, [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_3]], 0, [[S_MOV_B32_3]], [[V_ADD_F32_e64_3]], 0, implicit $exec
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], 0, [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], 0, [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 15
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_4]]
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 16
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
index dfaa7b4efac39c..b9446a09b03028 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir
@@ -39,8 +39,8 @@ body: |
# DPP64 does not support all control values and must be split to become legal
# GCN-LABEL: name: dpp64_illegal_ctrl
-# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
-# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp undef %1.sub0:vreg_64_align2, 0, undef %2.sub0:vreg_64_align2, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64_align2, 0, undef %2.sub1:vreg_64_align2, 1, 15, 15, 1, implicit $exec
# GCN: %0:vreg_64_align2 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
# GCN: %3:vreg_64_align2 = V_CEIL_F64_e32 %0, implicit $mode, implicit $exec
name: dpp64_illegal_ctrl
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index becc2bb095cc4b..265762f9c83b8a 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -25,29 +25,29 @@ body: |
%2:vgpr_32 = IMPLICIT_DEF
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%14:vgpr_32 = V_NOT_B32_e32 %13, implicit $exec
- %15:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %15:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%16:vgpr_32 = V_NOT_B32_e32 %15, implicit $exec
- %17:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %17:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%18:vgpr_32 = V_NOT_B32_e32 %17, implicit $exec
...
@@ -109,29 +109,29 @@ body: |
%2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; VOP2
- %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
- %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 0, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %5, %1, implicit $exec
- %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 1, implicit $exec
+ %7:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 1, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
- %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
+ %9:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 14, 15, 0, implicit $exec
%10:vgpr_32 = V_ADD_U32_e32 %9, %1, implicit $exec
; VOP1
- %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
+ %11:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, 15, 15, 1, implicit $exec
%12:vgpr_32 = V_NOT_B32_e32 %11, implicit $exec
- %13:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 0, implicit $exec
+ %13:vgpr_32 = V_MOV_B32_dpp %2, 0, %0, 1, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76498
More information about the llvm-commits
mailing list