[llvm] [AMDGPU]: Add support to unpack V_PK_MOV_B32 (PR #163464)
Akash Dutta via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 13:18:59 PST 2025
https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/163464
>From dc0089eaa822ff84ad394d2f1299d05059314c9f Mon Sep 17 00:00:00 2001
From: Akash Dutta <Akash.Dutta at amd.com>
Date: Tue, 14 Oct 2025 17:28:43 -0500
Subject: [PATCH 1/6] add support for v_pk_mov unpacking, redundant code
removal, bug fixes
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 185 +++---
...ck-non-coissue-insts-post-ra-scheduler.mir | 601 +++++++++++++++++-
2 files changed, 699 insertions(+), 87 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index c684f9e3a6e2c..379dc4f282743 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -47,9 +47,6 @@ class SIPreEmitPeephole {
const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
- // Check if the machine instruction being processed is a supported packed
- // instruction.
- bool isUnpackingSupportedInstr(MachineInstr &MI) const;
// Creates a list of packed instructions following an MFMA that are suitable
// for unpacking.
void collectUnpackingCandidates(MachineInstr &BeginMI,
@@ -62,7 +59,7 @@ class SIPreEmitPeephole {
// v_fma_f32 v1, v0, v2, v2
// Here, we have overwritten v0 before we use it. This function checks if
// unpacking can lead to such a situation.
- bool canUnpackingClobberRegister(const MachineInstr &MI);
+ bool canUnpackingClobberRegister(MachineInstr &MI);
// Unpack and insert F32 packed instructions, such as V_PK_MUL, V_PK_ADD, and
// V_PK_FMA. Currently, only V_PK_MUL, V_PK_ADD, V_PK_FMA are supported for
// this transformation.
@@ -456,22 +453,8 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
// If support is extended to new operations, add tests in
// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
-bool SIPreEmitPeephole::isUnpackingSupportedInstr(MachineInstr &MI) const {
- if (!TII->isNeverCoissue(MI))
- return false;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AMDGPU::V_PK_ADD_F32:
- case AMDGPU::V_PK_MUL_F32:
- case AMDGPU::V_PK_FMA_F32:
- return true;
- default:
- return false;
- }
- llvm_unreachable("Fully covered switch");
-}
-bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
+bool SIPreEmitPeephole::canUnpackingClobberRegister(MachineInstr &MI) {
unsigned OpCode = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
// Only the first register in the register pair needs to be checked due to the
@@ -482,21 +465,9 @@ bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
// Such scenarios can arise due to specific combinations of op_sel and
// op_sel_hi modifiers.
Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);
-
- const MachineOperand *Src0MO = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- if (Src0MO && Src0MO->isReg()) {
- Register SrcReg0 = Src0MO->getReg();
- unsigned Src0Mods =
- TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
- Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
- ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
- : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
- // Check if the register selected by op_sel_hi is the same as the first
- // register in the destination register pair.
- if (TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg))
- return true;
- }
-
+ uint16_t UnpackedOpCode = mapToUnpackedOpcode(MI);
+ bool UnpackedInstHasOneSrcOp =
+ !AMDGPU::hasNamedOperand(UnpackedOpCode, AMDGPU::OpName::src1);
const MachineOperand *Src1MO = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1MO && Src1MO->isReg()) {
Register SrcReg1 = Src1MO->getReg();
@@ -505,25 +476,44 @@ bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
Register HiSrc1Reg = (Src1Mods & SISrcMods::OP_SEL_1)
? TRI->getSubReg(SrcReg1, AMDGPU::sub1)
: TRI->getSubReg(SrcReg1, AMDGPU::sub0);
+ // Check if the register selected by op_sel_hi is the same as the first
+ // register in the destination register pair.
if (TRI->regsOverlap(UnpackedDstReg, HiSrc1Reg))
return true;
}
- // Applicable for packed instructions with 3 source operands, such as
- // V_PK_FMA.
- if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
- const MachineOperand *Src2MO =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2MO && Src2MO->isReg()) {
- Register SrcReg2 = Src2MO->getReg();
- unsigned Src2Mods =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm();
- Register HiSrc2Reg = (Src2Mods & SISrcMods::OP_SEL_1)
- ? TRI->getSubReg(SrcReg2, AMDGPU::sub1)
- : TRI->getSubReg(SrcReg2, AMDGPU::sub0);
- if (TRI->regsOverlap(UnpackedDstReg, HiSrc2Reg))
+ // V_MOV_B32s have one src operand. Other candidate unpacked instructions with
+ // 2 or more src operands will perform the following checks.
+ if (!UnpackedInstHasOneSrcOp) {
+ const MachineOperand *Src0MO =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (Src0MO && Src0MO->isReg()) {
+ Register SrcReg0 = Src0MO->getReg();
+ unsigned Src0Mods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
+ Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
+ ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
+ : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
+ if (TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg))
return true;
}
+
+ // Applicable for packed instructions with 3 source operands, such as
+ // V_PK_FMA.
+ if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
+ const MachineOperand *Src2MO =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2MO && Src2MO->isReg()) {
+ Register SrcReg2 = Src2MO->getReg();
+ unsigned Src2Mods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm();
+ Register HiSrc2Reg = (Src2Mods & SISrcMods::OP_SEL_1)
+ ? TRI->getSubReg(SrcReg2, AMDGPU::sub1)
+ : TRI->getSubReg(SrcReg2, AMDGPU::sub0);
+ if (TRI->regsOverlap(UnpackedDstReg, HiSrc2Reg))
+ return true;
+ }
+ }
}
return false;
}
@@ -540,6 +530,11 @@ uint16_t SIPreEmitPeephole::mapToUnpackedOpcode(MachineInstr &I) {
return AMDGPU::V_MUL_F32_e64;
case AMDGPU::V_PK_FMA_F32:
return AMDGPU::V_FMA_F32_e64;
+ case AMDGPU::V_PK_MOV_B32:
+ // Source modifiers aren't handled for MOV due to prevailing restrictions.
+ // Hence, 64-bit encoding isn't necessary. Will create unnecessary
+ // instruction cache pressure.
+ return AMDGPU::V_MOV_B32_e32;
default:
return std::numeric_limits<uint16_t>::max();
}
@@ -549,6 +544,7 @@ uint16_t SIPreEmitPeephole::mapToUnpackedOpcode(MachineInstr &I) {
void SIPreEmitPeephole::addOperandAndMods(MachineInstrBuilder &NewMI,
unsigned SrcMods, bool IsHiBits,
const MachineOperand &SrcMO) {
+ unsigned NewOpCode = NewMI->getOpcode();
unsigned NewSrcMods = 0;
unsigned NegModifier = IsHiBits ? SISrcMods::NEG_HI : SISrcMods::NEG;
unsigned OpSelModifier = IsHiBits ? SISrcMods::OP_SEL_1 : SISrcMods::OP_SEL_0;
@@ -561,12 +557,18 @@ void SIPreEmitPeephole::addOperandAndMods(MachineInstrBuilder &NewMI,
// modifier for the higher 32 bits. Unpacked VOP3 instructions support
// ABS, but do not support NEG_HI. Therefore we need to explicitly add the
// NEG modifier if present in the packed instruction.
+ bool IsSrcModifidiersSupported =
+ AMDGPU::hasNamedOperand(NewOpCode, AMDGPU::OpName::src0_modifiers);
+ bool UnpackedInstHasOneSrcOp =
+ !AMDGPU::hasNamedOperand(NewOpCode, AMDGPU::OpName::src1);
+
if (SrcMods & NegModifier)
NewSrcMods |= SISrcMods::NEG;
// Src modifiers. Only negative modifiers are added if needed. Unpacked
// operations do not have op_sel, therefore it must be handled explicitly as
// done below.
- NewMI.addImm(NewSrcMods);
+ if (IsSrcModifidiersSupported)
+ NewMI.addImm(NewSrcMods);
if (SrcMO.isImm()) {
NewMI.addImm(SrcMO.getImm());
return;
@@ -594,7 +596,7 @@ void SIPreEmitPeephole::addOperandAndMods(MachineInstrBuilder &NewMI,
bool OpSel = SrcMods & SISrcMods::OP_SEL_0;
bool OpSelHi = SrcMods & SISrcMods::OP_SEL_1;
bool KillState = true;
- if ((OpSel == OpSelHi) && !IsHiBits)
+ if ((OpSel == OpSelHi) && !IsHiBits && !UnpackedInstHasOneSrcOp)
KillState = false;
UnpackedSrcMO.setIsKill(KillState);
}
@@ -612,10 +614,12 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {
MachineInstr &Instr = *I;
+ uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
- (TII->isNeverCoissue(Instr) && !isUnpackingSupportedInstr(Instr)) ||
+ (TII->isNeverCoissue(Instr) &&
+ (UnpackedOpCode == std::numeric_limits<uint16_t>::max())) ||
(SIInstrInfo::modifiesModeRegister(Instr) &&
Instr.modifiesRegister(AMDGPU::EXEC, TRI)))
return;
@@ -639,7 +643,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))
return;
}
- if (!isUnpackingSupportedInstr(Instr))
+ if (UnpackedOpCode == std::numeric_limits<uint16_t>::max())
continue;
if (canUnpackingClobberRegister(Instr))
@@ -661,8 +665,28 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
MachineOperand DstOp = I.getOperand(0);
uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);
- assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&
- "Unsupported Opcode");
+ // V_MOV_B32 does not support source modifiers. Without source modifiers, we
+ // cannot be faithful to the packed instruction semantics in few cases. This
+ // is true when the packed instruction has NEG and NEG_HI modifiers. We should
+ // abort unpacking if:
+ // 1. hi/lo bits selected by OPSEL for src0 are also marked by NEG or NEG_HI.
+ // 2. hi/lo bits selected by OPSEL_HI for src1 are also marked by NEG or
+ // NEG_HI.
+ // Packed instructions do not specify ABS modifiers, so we can safely ignore
+ // those.
+ if (!AMDGPU::hasNamedOperand(UnpackedOpcode,
+ AMDGPU::OpName::src0_modifiers)) {
+ unsigned Src0Mods =
+ TII->getNamedOperand(I, AMDGPU::OpName::src0_modifiers)->getImm();
+ unsigned Src1Mods =
+ TII->getNamedOperand(I, AMDGPU::OpName::src1_modifiers)->getImm();
+ unsigned negMask0 =
+ (Src0Mods & SISrcMods::OP_SEL_0) ? SISrcMods::NEG_HI : SISrcMods::NEG;
+ unsigned negMask1 =
+ (Src1Mods & SISrcMods::OP_SEL_1) ? SISrcMods::NEG_HI : SISrcMods::NEG;
+ if ((Src0Mods & negMask0) || (Src1Mods & negMask1))
+ return;
+ }
MachineInstrBuilder Op0LOp1L =
createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/false);
@@ -689,8 +713,8 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
bool IsHiBits) {
MachineBasicBlock &MBB = *I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
- const MachineOperand *SrcMO2 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
+ const MachineOperand *SrcMO0 = TII->getNamedOperand(I, AMDGPU::OpName::src0);
+ const MachineOperand *SrcMO1 = TII->getNamedOperand(I, AMDGPU::OpName::src1);
Register DstReg = I.getOperand(0).getReg();
unsigned OpCode = I.getOpcode();
Register UnpackedDstReg = IsHiBits ? TRI->getSubReg(DstReg, AMDGPU::sub1)
@@ -704,8 +728,15 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
MachineInstrBuilder NewMI = BuildMI(MBB, I, DL, TII->get(UnpackedOpcode));
NewMI.addDef(UnpackedDstReg); // vdst
- addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO1);
- addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO2);
+ if (AMDGPU::hasNamedOperand(UnpackedOpcode, AMDGPU::OpName::src0) &&
+ AMDGPU::hasNamedOperand(UnpackedOpcode, AMDGPU::OpName::src1)) {
+ addOperandAndMods(NewMI, Src0Mods, IsHiBits, *SrcMO0);
+ addOperandAndMods(NewMI, Src1Mods, IsHiBits, *SrcMO1);
+ } else {
+ const MachineOperand *SrcMO = IsHiBits ? SrcMO1 : SrcMO0;
+ unsigned SrcMods = IsHiBits ? Src1Mods : Src0Mods;
+ addOperandAndMods(NewMI, SrcMods, IsHiBits, *SrcMO);
+ }
if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
const MachineOperand *SrcMO3 =
@@ -714,10 +745,12 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
TII->getNamedOperand(I, AMDGPU::OpName::src2_modifiers)->getImm();
addOperandAndMods(NewMI, Src2Mods, IsHiBits, *SrcMO3);
}
- NewMI.addImm(ClampVal); // clamp
+ if (AMDGPU::hasNamedOperand(UnpackedOpcode, AMDGPU::OpName::clamp))
+ NewMI.addImm(ClampVal); // clamp
// Packed instructions do not support output modifiers. safe to assign them 0
// for this use case
- NewMI.addImm(0); // omod
+ if (AMDGPU::hasNamedOperand(UnpackedOpcode, AMDGPU::OpName::omod))
+ NewMI.addImm(0); // omod
return NewMI;
}
@@ -789,22 +822,24 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
// TODO: Fold this into previous block, if possible. Evaluate and handle any
// side effects.
- for (MachineBasicBlock &MBB : MF) {
- // Unpack packed instructions overlapped by MFMAs. This allows the compiler
- // to co-issue unpacked instructions with MFMA
- auto SchedModel = TII->getSchedModel();
- SetVector<MachineInstr *> InstrsToUnpack;
- for (auto &MI : make_early_inc_range(MBB.instrs())) {
- if (!SIInstrInfo::isMFMA(MI))
- continue;
- const MCSchedClassDesc *SchedClassDesc =
- SchedModel.resolveSchedClass(&MI);
- uint16_t NumMFMACycles =
- SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
- collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
- }
- for (MachineInstr *MI : InstrsToUnpack) {
- performF32Unpacking(*MI);
+ if (ST.hasGFX950Insts() || ST.hasGFX940Insts()) {
+ for (MachineBasicBlock &MBB : MF) {
+ // Unpack packed instructions overlapped by MFMAs. This allows the
+ // compiler to co-issue unpacked instructions with MFMA
+ auto SchedModel = TII->getSchedModel();
+ SetVector<MachineInstr *> InstrsToUnpack;
+ for (auto &MI : make_early_inc_range(MBB.instrs())) {
+ if (!SIInstrInfo::isMFMA(MI))
+ continue;
+ const MCSchedClassDesc *SchedClassDesc =
+ SchedModel.resolveSchedClass(&MI);
+ uint16_t NumMFMACycles =
+ SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
+ collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
+ }
+ for (MachineInstr *MI : InstrsToUnpack) {
+ performF32Unpacking(*MI);
+ }
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
index 75ae76fdee19b..2f7c42f46e0c2 100644
--- a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
+++ b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
@@ -641,9 +641,6 @@ body: |
; GFX950-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
; GFX950-NEXT: $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec
; GFX950-NEXT: $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec
- ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX950-NEXT: $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX950-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec
; GFX950-NEXT: S_ENDPGM 0
;
; GFX942-LABEL: name: test_opcodes_not_supported_for_unpacking_are_skipped
@@ -664,9 +661,6 @@ body: |
; GFX942-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
; GFX942-NEXT: $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec
; GFX942-NEXT: $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec
- ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX942-NEXT: $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX942-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec
; GFX942-NEXT: S_ENDPGM 0
;
; GFX90A-LABEL: name: test_opcodes_not_supported_for_unpacking_are_skipped
@@ -687,9 +681,6 @@ body: |
; GFX90A-NEXT: $vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
; GFX90A-NEXT: $vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec
; GFX90A-NEXT: $vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec
- ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX90A-NEXT: $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- ; GFX90A-NEXT: $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec
; GFX90A-NEXT: S_ENDPGM 0
early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
@@ -706,9 +697,6 @@ body: |
$vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
$vgpr4 = V_MOV_B32_dpp $vgpr4, $vgpr4, 228, 15, 15, -1, implicit $exec
$vgpr5 = V_CVT_PK_BF8_F32_e64 0, killed $vgpr4, 0, $vgpr4, $vgpr5, 0, implicit $mode, implicit $exec
- $vgpr6_vgpr7 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- $vgpr8_vgpr9 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
- $vgpr10_vgpr11 = V_PK_MOV_B32 12, killed $vgpr6_vgpr7, 8, killed $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -1167,3 +1155,592 @@ body: |
$vgpr5 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
renamable $vgpr16_vgpr17 = nofpexcept V_PK_MUL_F32 8, killed $sgpr30_sgpr31, 11, killed $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_unpacking
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_unpacking
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX950-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_unpacking
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX942-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_unpacking
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_with_neg_modifiers_not_unpacked_0
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_0
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 9, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_0
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 9, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_0
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 9, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 9, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_with_neg_modifiers_not_unpacked_1
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_1
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 10, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_1
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 10, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_with_neg_modifiers_not_unpacked_1
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 10, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 10, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_2
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_2
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX950-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_2
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX942-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_2
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 9, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 8, killed $vgpr0_vgpr1, 9, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_3
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_3
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX950-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_3
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+ ; GFX942-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_3
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 10, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 10, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_4
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_4
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 14, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_4
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 14, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_with_neg_modifiers_correctly_unpacked_4
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 14, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 14, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
+---
+name: test_v_pk_mov_op_sel_unpacking
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$sgpr4_sgpr5' }
+
+body: |
+ bb.0.entry:
+ liveins: $sgpr4_sgpr5
+ ; GFX950-LABEL: name: test_v_pk_mov_op_sel_unpacking
+ ; GFX950: liveins: $sgpr4_sgpr5
+ ; GFX950-NEXT: {{ $}}
+ ; GFX950-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX950-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX950-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX950-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX950-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX950-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX950-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX950-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX950-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX950-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX950-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX950-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ ; GFX950-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ ; GFX950-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: test_v_pk_mov_op_sel_unpacking
+ ; GFX942: liveins: $sgpr4_sgpr5
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX942-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX942-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX942-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX942-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX942-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX942-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX942-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ ; GFX942-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: test_v_pk_mov_op_sel_unpacking
+ ; GFX90A: liveins: $sgpr4_sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ ; GFX90A-NEXT: early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ ; GFX90A-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX90A-NEXT: $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ ; GFX90A-NEXT: $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ ; GFX90A-NEXT: early-clobber renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 12, killed $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0
+ early-clobber renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM_ec killed renamable $sgpr4_sgpr5, 0, 0
+ renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec
+ renamable $sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX4_IMM renamable $sgpr40_sgpr41, 0, 0
+ renamable $sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX4_IMM renamable $sgpr42_sgpr43, 0, 0
+ early-clobber renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr40_sgpr41, 0, 0
+ early-clobber renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX16_IMM_ec killed renamable $sgpr42_sgpr43, 0, 0
+ $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr44_sgpr45, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr44_sgpr45_sgpr46_sgpr47
+ $vgpr2_vgpr3 = V_MOV_B64_e32 killed $sgpr46_sgpr47, implicit $exec, implicit $sgpr44_sgpr45_sgpr46_sgpr47, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e32 $sgpr48_sgpr49, implicit $exec, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ $vgpr6_vgpr7 = V_MOV_B64_e32 killed $sgpr50_sgpr51, implicit $exec, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
+ renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X16_F16_e64 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4_vgpr5 = V_PK_MOV_B32 12, killed $vgpr0_vgpr1, 0, killed $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
>From 8587628d8cd93754b5d5f1b3c001064ae0f7885f Mon Sep 17 00:00:00 2001
From: Akash Dutta <Akash.Dutta at amd.com>
Date: Tue, 14 Oct 2025 17:37:10 -0500
Subject: [PATCH 2/6] restoring deleted assert
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 379dc4f282743..2361c73f73481 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -665,6 +665,8 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
MachineOperand DstOp = I.getOperand(0);
uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);
+ assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&
+ "Unsupported Opcode");
// V_MOV_B32 does not support source modifiers. Without source modifiers, we
// cannot be faithful to the packed instruction semantics in few cases. This
// is true when the packed instruction has NEG and NEG_HI modifiers. We should
>From 25b9c71035be05b2cb1cac3a175c4adbe55ab772 Mon Sep 17 00:00:00 2001
From: Akash Dutta <137309513+akadutta at users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:02:08 -0600
Subject: [PATCH 3/6] Update test with suggestion
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir | 4 ----
1 file changed, 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
index 2f7c42f46e0c2..992dbf7167b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
+++ b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
@@ -1160,10 +1160,6 @@ body: |
---
name: test_v_pk_mov_unpacking
tracksRegLiveness: true
-
-liveins:
- - { reg: '$sgpr4_sgpr5' }
-
body: |
bb.0.entry:
liveins: $sgpr4_sgpr5
>From e2fbe0ce2b7579c4276700e6ceb9fc59fb83f3c2 Mon Sep 17 00:00:00 2001
From: Akash Dutta <137309513+akadutta at users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:03:53 -0600
Subject: [PATCH 4/6] Apply suggestion from @arsenm
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 0da450e577832..33fe8975d45d1 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -616,7 +616,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
MachineInstr &Instr = *I;
uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
bool IsUnpackable =
- !(UnpackedOpCode == std::numeric_limits<uint16_t>::max());
+ UnpackedOpCode != std::numeric_limits<uint16_t>::max();
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
>From 08fdc0764b05e56e87f5b5bad54b15d2c61bd738 Mon Sep 17 00:00:00 2001
From: Akash Dutta <137309513+akadutta at users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:06:23 -0600
Subject: [PATCH 5/6] Apply suggestion from @arsenm
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
.../AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
index 992dbf7167b5e..efdb5fb4b9bec 100644
--- a/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
+++ b/llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir
@@ -1161,7 +1161,7 @@ body: |
name: test_v_pk_mov_unpacking
tracksRegLiveness: true
body: |
- bb.0.entry:
+ bb.0:
liveins: $sgpr4_sgpr5
; GFX950-LABEL: name: test_v_pk_mov_unpacking
; GFX950: liveins: $sgpr4_sgpr5
>From 44b3744c83f53f8a25deaf3315cea5bd18f966f9 Mon Sep 17 00:00:00 2001
From: Akash Dutta <Akash.Dutta at amd.com>
Date: Wed, 5 Nov 2025 15:18:22 -0600
Subject: [PATCH 6/6] reduce code size, address code reviews
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 137 +++++++++----------
1 file changed, 65 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 33fe8975d45d1..cb10216fbab3b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -59,7 +59,7 @@ class SIPreEmitPeephole {
// v_fma_f32 v1, v0, v2, v2
// Here, we have overwritten v0 before we use it. This function checks if
// unpacking can lead to such a situation.
- bool canUnpackingClobberRegister(MachineInstr &MI);
+ bool canUnpackingClobberRegister(const MachineInstr &MI);
// Unpack and insert F32 packed instructions, such as V_PK_MUL, V_PK_ADD, and
// V_PK_FMA. Currently, only V_PK_MUL, V_PK_ADD, V_PK_FMA are supported for
// this transformation.
@@ -454,7 +454,7 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
// If support is extended to new operations, add tests in
// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
-bool SIPreEmitPeephole::canUnpackingClobberRegister(MachineInstr &MI) {
+bool SIPreEmitPeephole::canUnpackingClobberRegister(const MachineInstr &MI) {
unsigned OpCode = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
// Only the first register in the register pair needs to be checked due to the
@@ -465,55 +465,48 @@ bool SIPreEmitPeephole::canUnpackingClobberRegister(MachineInstr &MI) {
// Such scenarios can arise due to specific combinations of op_sel and
// op_sel_hi modifiers.
Register UnpackedDstReg = TRI->getSubReg(DstReg, AMDGPU::sub0);
- uint16_t UnpackedOpCode = mapToUnpackedOpcode(MI);
- bool UnpackedInstHasOneSrcOp =
- !AMDGPU::hasNamedOperand(UnpackedOpCode, AMDGPU::OpName::src1);
- const MachineOperand *Src1MO = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Src1MO && Src1MO->isReg()) {
- Register SrcReg1 = Src1MO->getReg();
- unsigned Src1Mods =
- TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
- Register HiSrc1Reg = (Src1Mods & SISrcMods::OP_SEL_1)
- ? TRI->getSubReg(SrcReg1, AMDGPU::sub1)
- : TRI->getSubReg(SrcReg1, AMDGPU::sub0);
- // Check if the register selected by op_sel_hi is the same as the first
- // register in the destination register pair.
- if (TRI->regsOverlap(UnpackedDstReg, HiSrc1Reg))
- return true;
- }
- // V_MOV_B32s have one src operand. Other candidate unpacked instructions with
- // 2 or more src operands will perform the following checks.
- if (!UnpackedInstHasOneSrcOp) {
- const MachineOperand *Src0MO =
- TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- if (Src0MO && Src0MO->isReg()) {
- Register SrcReg0 = Src0MO->getReg();
- unsigned Src0Mods =
- TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
- Register HiSrc0Reg = (Src0Mods & SISrcMods::OP_SEL_1)
- ? TRI->getSubReg(SrcReg0, AMDGPU::sub1)
- : TRI->getSubReg(SrcReg0, AMDGPU::sub0);
- if (TRI->regsOverlap(UnpackedDstReg, HiSrc0Reg))
- return true;
- }
+ // Lambda to check if a source operand causes clobbering
+ auto checkSrcClobber = [&](AMDGPU::OpName SrcName, AMDGPU::OpName ModsName) -> bool {
+ const MachineOperand *SrcMO = TII->getNamedOperand(MI, SrcName);
+ if (SrcMO && SrcMO->isReg()) {
+ Register SrcReg = SrcMO->getReg();
+ unsigned SrcMods = TII->getNamedOperand(MI, ModsName)->getImm();
+ Register HiSrcReg = (SrcMods & SISrcMods::OP_SEL_1)
+ ? TRI->getSubReg(SrcReg, AMDGPU::sub1)
+ : TRI->getSubReg(SrcReg, AMDGPU::sub0);
+ return TRI->regsOverlap(UnpackedDstReg, HiSrcReg);
+ }
+ };
+
+ // Src1 should be checked before src0 to avoid false positives.
+ // For example, the following unpacked sequence is legal:
+ // $vgpr0_vgpr1 = V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr2_vgpr3
+ // =>
+ // $vgpr0 = V_MUL_F32 $vgpr0, $vgpr2
+ // $vgpr1 = V_MUL_F32 $vgpr1, $vgpr3
+ // Although the destination and source overlap in the first instruction ($vgpr0), $vgpr0 is not used as a source in the second instruction.
+ // Therefore, unpacking this sequence is safe.
+ //
+ // The following sequence, however, is not safe to unpack:
+ // $vgpr0_vgpr1 = V_PK_MUL_F32 0, $vgpr0_vgpr1, 8, $vgpr2_vgpr3
+ // =>
+ // $vgpr0 = V_MUL_F32 $vgpr0, $vgpr2
+ // $vgpr1 = V_MUL_F32 $vgpr0, $vgpr3
+ // In the unpacked version, $vgpr1 uses $vgpr0 as a source, but $vgpr0 was updated in the previous instruction.
+ // This behavior does not occur with the packed instruction.
+ // As a result, it is unsafe to unpack this sequence.
+ if (checkSrcClobber(AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers))
+ return true;
- // Applicable for packed instructions with 3 source operands, such as
- // V_PK_FMA.
- if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
- const MachineOperand *Src2MO =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2MO && Src2MO->isReg()) {
- Register SrcReg2 = Src2MO->getReg();
- unsigned Src2Mods =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm();
- Register HiSrc2Reg = (Src2Mods & SISrcMods::OP_SEL_1)
- ? TRI->getSubReg(SrcReg2, AMDGPU::sub1)
- : TRI->getSubReg(SrcReg2, AMDGPU::sub0);
- if (TRI->regsOverlap(UnpackedDstReg, HiSrc2Reg))
- return true;
- }
- }
+ if (checkSrcClobber(AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers))
+ return true;
+
+ // Applicable for packed instructions with 3 source operands, such as
+ // V_PK_FMA.
+ if (AMDGPU::hasNamedOperand(OpCode, AMDGPU::OpName::src2)) {
+ if (checkSrcClobber(AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers))
+ return true;
}
return false;
}
@@ -646,7 +639,29 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
}
if (!IsUnpackable)
continue;
-
+
+ // V_MOV_B32 does not support source modifiers. Without source modifiers, we
+ // cannot be faithful to the packed instruction semantics in few cases. This
+ // is true when the packed instruction has NEG and NEG_HI modifiers. We should
+ // abort unpacking if:
+ // 1. hi/lo bits selected by OPSEL for src0 are also marked by NEG or NEG_HI.
+ // 2. hi/lo bits selected by OPSEL_HI for src1 are also marked by NEG or
+ // NEG_HI.
+ // Packed instructions do not specify ABS modifiers, so we can safely ignore
+ // those.
+ if (!AMDGPU::hasNamedOperand(UnpackedOpCode,
+ AMDGPU::OpName::src0_modifiers)) {
+ unsigned Src0Mods =
+ TII->getNamedOperand(Instr, AMDGPU::OpName::src0_modifiers)->getImm();
+ unsigned Src1Mods =
+ TII->getNamedOperand(Instr, AMDGPU::OpName::src1_modifiers)->getImm();
+ unsigned negMask0 =
+ (Src0Mods & SISrcMods::OP_SEL_0) ? SISrcMods::NEG_HI : SISrcMods::NEG;
+ unsigned negMask1 =
+ (Src1Mods & SISrcMods::OP_SEL_1) ? SISrcMods::NEG_HI : SISrcMods::NEG;
+ if ((Src0Mods & negMask0) || (Src1Mods & negMask1))
+ return;
+ }
if (canUnpackingClobberRegister(Instr))
return;
// If it's a packed instruction, adjust latency: remove the packed
@@ -667,28 +682,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
uint16_t UnpackedOpcode = mapToUnpackedOpcode(I);
assert(UnpackedOpcode != std::numeric_limits<uint16_t>::max() &&
"Unsupported Opcode");
- // V_MOV_B32 does not support source modifiers. Without source modifiers, we
- // cannot be faithful to the packed instruction semantics in few cases. This
- // is true when the packed instruction has NEG and NEG_HI modifiers. We should
- // abort unpacking if:
- // 1. hi/lo bits selected by OPSEL for src0 are also marked by NEG or NEG_HI.
- // 2. hi/lo bits selected by OPSEL_HI for src1 are also marked by NEG or
- // NEG_HI.
- // Packed instructions do not specify ABS modifiers, so we can safely ignore
- // those.
- if (!AMDGPU::hasNamedOperand(UnpackedOpcode,
- AMDGPU::OpName::src0_modifiers)) {
- unsigned Src0Mods =
- TII->getNamedOperand(I, AMDGPU::OpName::src0_modifiers)->getImm();
- unsigned Src1Mods =
- TII->getNamedOperand(I, AMDGPU::OpName::src1_modifiers)->getImm();
- unsigned negMask0 =
- (Src0Mods & SISrcMods::OP_SEL_0) ? SISrcMods::NEG_HI : SISrcMods::NEG;
- unsigned negMask1 =
- (Src1Mods & SISrcMods::OP_SEL_1) ? SISrcMods::NEG_HI : SISrcMods::NEG;
- if ((Src0Mods & negMask0) || (Src1Mods & negMask1))
- return;
- }
MachineInstrBuilder Op0LOp1L =
createUnpackedMI(I, UnpackedOpcode, /*IsHiBits=*/false);
More information about the llvm-commits
mailing list