[llvm] Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (PR #168353)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 04:21:53 PST 2025
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/168353
>From 73b490d7a0d1516fb253a12ccec2865a621138e7 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 24 Oct 2025 15:19:43 +0100
Subject: [PATCH 1/5] Revert "Revert "Reland "RegisterCoalescer: Add
implicit-def of super register when coalescing SUBREG_TO_REG" (#134408)""
This reverts commit ed5bd23867eacaa3789060f9250ba6fcece2a3de.
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 179 ++++++-
.../AArch64/GlobalISel/arm64-pcsections.ll | 64 +--
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 20 +-
.../implicit-def-subreg-to-reg-regression.ll | 4 +-
.../AArch64/preserve_nonecc_varargs_darwin.ll | 10 +-
...er-coalesce-implicit-def-subreg-to-reg.mir | 23 +
...gister-coalesce-update-subranges-remat.mir | 161 ++++++-
.../CodeGen/PowerPC/aix-vec_insert_elt.ll | 4 +
.../CodeGen/PowerPC/build-vector-tests.ll | 48 ++
.../PowerPC/canonical-merge-shuffles.ll | 6 +
llvm/test/CodeGen/PowerPC/combine-fneg.ll | 1 +
llvm/test/CodeGen/PowerPC/fp-strict-round.ll | 6 +
llvm/test/CodeGen/PowerPC/frem.ll | 3 +
.../PowerPC/froundeven-legalization.ll | 8 +
llvm/test/CodeGen/PowerPC/ldexp.ll | 2 +
llvm/test/CodeGen/PowerPC/llvm.modf.ll | 1 +
llvm/test/CodeGen/PowerPC/vec_insert_elt.ll | 4 +
.../vector-constrained-fp-intrinsics.ll | 176 +++++++
...coalescer-breaks-subreg-to-reg-liveness.ll | 185 +++++++
...icit-def-regression-imp-operand-assert.mir | 6 +-
...subreg-to-reg-requires-subrange-update.mir | 44 ++
llvm/test/CodeGen/X86/pr76416.ll | 79 +++
llvm/test/CodeGen/X86/subreg-fail.mir | 4 +-
.../CodeGen/X86/subreg-to-reg-coalescing.mir | 451 ++++++++++++++++++
24 files changed, 1417 insertions(+), 72 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
create mode 100644 llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
create mode 100644 llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
create mode 100644 llvm/test/CodeGen/X86/pr76416.ll
create mode 100644 llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 25c4375a73ce0..b606b1cd5504b 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -307,7 +307,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p SubregToRegSrcInst is not empty, we are coalescing a
+ /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
+ /// implicit-def of DstReg on instructions that define SrcReg.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ ArrayRef<MachineInstr *> SubregToRegSrcInst = {});
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1444,6 +1449,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
+ LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1464,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
+ if (MO.isDef() && MO.getReg().isVirtual() &&
+ MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1494,14 +1503,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // We're only expecting another def of the main output, so the range
- // should get updated with the regular output range.
- //
- // FIXME: The range updating below probably needs updating to look at
- // the super register if subranges are tracked.
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "subrange update for implicit-def of super register may not be "
- "properly handled");
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
+ // implicit def operands to avoid subranges for the super-regs from
+ // being removed by code later on in this function.
+ if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
@@ -1617,7 +1623,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
*LIS->getSlotIndexes(), *TRI);
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none()) {
+ if ((SR.LaneMask & DstMask).none() &&
+ (SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1891,11 +1898,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+void RegisterCoalescer::updateRegDefsUses(
+ Register SrcReg, Register DstReg, unsigned SubIdx,
+ ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
+ // Coalescing a COPY may expose reads of 'undef' subregisters.
+ // If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
@@ -1912,6 +1922,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
+ // create duplicate subranges when we update the interval for unused lanes.
+ LaneBitmask DstIntLaneMask;
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ for (LiveInterval::SubRange &SR : DstInt->subranges())
+ DstIntLaneMask |= SR.LaneMask;
+ }
+
+ // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
@@ -1935,6 +1954,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool RequiresImplicitRedef = false;
+ if (!SubregToRegSrcInsts.empty()) {
+ // We can only add an implicit-def and undef if the sub registers match,
+ // e.g.
+ // %0:gr32 = INSTX
+ // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
+ // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
+ //
+ // This cannot be transformed into:
+ // %1.sub32:gr64 = INSTX
+ // undef %1.sub8:gr64 = INSTY , implicit-def %1
+ //
+ // Because that would thrash the top 24 bits of %1.sub32.
+ if (is_contained(SubregToRegSrcInsts, UseMI) &&
+ all_of(UseMI->defs(),
+ [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
+ if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
+ return true;
+ return SubIdx == MO.getSubReg();
+ })) {
+ // Add implicit-def of super-register to express that the whole
+ // register is defined by the instruction.
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ RequiresImplicitRedef = true;
+ }
+
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ // First check if there is sufficient granularity in terms of subranges.
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
+ if ((UnusedLanes & ~DstIntLaneMask).any()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+ DstIntLaneMask |= UnusedLanes;
+ }
+
+ // After duplicating the live ranges for the low/hi bits, we
+ // need to update the subranges of the DstReg interval such that
+ // for a case like this:
+ //
+ // entry:
+ // 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
+ // :
+ // if.then:
+ // 32B %1:gpr32 = MOVIMM32 ..
+ // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
+ //
+ // Only the MOVIMM32 require a def of the top lanes and any intervals
+ // for the top 32-bits of the def at 16B should be removed.
+ for (LiveInterval::SubRange &SR : DstInt->subranges()) {
+ if (!Writes || RequiresImplicitRedef ||
+ (SR.LaneMask & UnusedLanes).none())
+ continue;
+
+ assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
+ "Unexpected lanemask. Subrange needs finer granularity");
+
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
+ auto SegmentI = SR.find(UseIdx);
+ if (SegmentI != SR.end())
+ SR.removeSegment(SegmentI, true);
+ }
+ }
+ }
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1943,7 +2036,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
- MO.setIsUndef(!Reads);
+ MO.setIsUndef(!Reads || RequiresImplicitRedef);
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -2046,6 +2139,30 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}
+/// For a given use of value \p Idx, it returns the def in the current block,
+/// or otherwise all possible defs in preceding blocks.
+static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
+ SmallVector<MachineInstr *> &Instrs,
+ LiveIntervals *LIS, LiveInterval &SrcInt,
+ MachineBasicBlock *MBB, VNInfo *Idx) {
+ if (!Idx->isPHIDef()) {
+ MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
+ assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
+ Instrs.push_back(Def);
+ return true;
+ }
+
+ bool Any = false;
+ if (VisitedBlocks.count(MBB))
+ return false;
+ VisitedBlocks.insert(MBB);
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
+ SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
+ }
+ return Any;
+}
+
bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
@@ -2183,6 +2300,35 @@ bool RegisterCoalescer::joinCopy(
});
}
+ SmallVector<MachineInstr *> SubregToRegSrcInsts;
+ if (CopyMI->isSubregToReg()) {
+ // For the case where the copy instruction is a SUBREG_TO_REG, e.g.
+ //
+ // %0:gpr32 = movimm32 ..
+ // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
+ // ...
+ // %0:gpr32 = COPY <something>
+ //
+ // After joining liveranges, the original `movimm32` will need an
+ // implicit-def to make it explicit that the entire register is written,
+ // i.e.
+ //
+ // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
+ // ...
+ // undef %0.sub32:gpr64 = COPY <something> // Note that this does not
+ // // require an implicit-def,
+ // // because it has nothing to
+ // // do with the SUBREG_TO_REG.
+ LiveInterval &SrcInt =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
+ if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
+ CopyMI->getParent(),
+ SrcInt.Query(SubregToRegSlotIdx).valueIn()))
+ llvm_unreachable("SUBREG_TO_REG src requires a def");
+ }
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2251,9 +2397,12 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
+ if (CP.getDstIdx()) {
+ assert(SubregToRegSrcInsts.empty() && "can this happen?");
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ SubregToRegSrcInsts);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 57481724936a3..cab2741be9929 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+ ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $x0, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
@@ -726,7 +726,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -750,7 +750,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -773,7 +773,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -797,7 +797,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -821,7 +821,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -845,7 +845,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -869,7 +869,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
@@ -895,7 +895,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
@@ -923,10 +923,10 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -951,10 +951,10 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -977,7 +977,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1001,7 +1001,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1024,7 +1024,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1048,7 +1048,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1072,7 +1072,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1096,7 +1096,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
@@ -1120,7 +1120,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
@@ -1146,7 +1146,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
@@ -1174,10 +1174,10 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1202,10 +1202,10 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+ ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def renamable $x10, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -1230,7 +1230,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
@@ -1273,7 +1273,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index d8f370884c84a..6756599ca0ee3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1727,7 +1727,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov v2.16b, v1.16b
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
; CHECK-GI-NEXT: mov v1.b[0], v0.b[0]
; CHECK-GI-NEXT: mov v1.b[1], v0.b[1]
@@ -1771,7 +1771,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v8i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov b2, v0.b[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1
; CHECK-GI-NEXT: mov v2.b[1], v0.b[1]
; CHECK-GI-NEXT: mov v2.b[2], v0.b[2]
; CHECK-GI-NEXT: mov v2.b[3], v0.b[3]
@@ -1835,9 +1835,9 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0 def $q0
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1 def $q1
; CHECK-GI-NEXT: mov v2.b[1], v0.b[1]
; CHECK-GI-NEXT: mov v2.b[2], v0.b[2]
; CHECK-GI-NEXT: mov v2.b[3], v0.b[3]
@@ -1920,7 +1920,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov v2.16b, v1.16b
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 def $q0 def $q0 def $q0
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
; CHECK-GI-NEXT: mov v1.h[0], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[1], v0.h[1]
@@ -1952,7 +1952,7 @@ define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v4i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov h2, v0.h[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 def $q1 def $q1 def $q1
; CHECK-GI-NEXT: mov v2.h[1], v0.h[1]
; CHECK-GI-NEXT: mov v2.h[2], v0.h[2]
; CHECK-GI-NEXT: mov v2.h[3], v0.h[3]
@@ -1992,9 +1992,9 @@ define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 def $q0 def $q0 def $q0
; CHECK-GI-NEXT: mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 def $q1 def $q1 def $q1
; CHECK-GI-NEXT: mov v2.h[1], v0.h[1]
; CHECK-GI-NEXT: mov v2.h[2], v0.h[2]
; CHECK-GI-NEXT: mov v2.h[3], v0.h[3]
@@ -2053,7 +2053,7 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov v2.16b, v1.16b
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 def $q0
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
@@ -2079,7 +2079,7 @@ define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v2i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov s2, v0.s[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 def $q1
; CHECK-GI-NEXT: mov v2.s[1], v0.s[1]
; CHECK-GI-NEXT: mov v2.s[2], v1.s[0]
; CHECK-GI-NEXT: mov v2.s[3], v1.s[1]
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 50fac819d4afe..e37d8f7da4bfc 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s
; Check there's no assert in spilling from implicit-def operands on an
; IMPLICIT_DEF.
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Reload
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl _fprintf
; CHECK-NEXT: brk #0x1
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 2a77d4dd33fe5..4206c0bc26991 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: sub sp, sp, #208
; CHECK-NEXT: mov w8, #10 ; =0xa
; CHECK-NEXT: mov w9, #9 ; =0x9
-; CHECK-NEXT: mov w10, #8 ; =0x8
+; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: stp x9, x8, [sp, #24]
-; CHECK-NEXT: mov w8, #7 ; =0x7
+; CHECK-NEXT: mov w8, #8 ; =0x8
; CHECK-NEXT: mov w9, #6 ; =0x6
-; CHECK-NEXT: mov w0, #1 ; =0x1
+; CHECK-NEXT: str x8, [sp, #16]
+; CHECK-NEXT: mov w8, #7 ; =0x7
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x8, x10, [sp, #8]
-; CHECK-NEXT: str x9, [sp]
+; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: bl _callee
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
new file mode 100644
index 0000000000000..aecb90adecd71
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
@@ -0,0 +1,23 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x1
+ ; CHECK-LABEL: name: test
+ ; CHECK: liveins: $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x0 = COPY $x1
+ ; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
+ ; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+ %190:gpr64 = COPY killed $x1
+ %191:gpr32 = COPY %190.sub_32:gpr64
+ %192:gpr32 = ORRWrr $wzr, killed %191:gpr32
+ %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
+ $x0 = COPY killed %190:gpr64
+ $x1 = COPY killed %193:gpr64all
+ RET_ReallyLR implicit $x1, implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 08fc47d9480ce..eb6242ce9940d 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -7,9 +7,18 @@
# CHECK-DBG: ********** JOINING INTERVALS ***********
# CHECK-DBG: ********** INTERVALS **********
# CHECK-DBG: %0 [16r,32r:0) 0 at 16r weight:0.000000e+00
-# CHECK-DBG: %3 [48r,112r:0) 0 at 48r L0000000000000040 [48r,112r:0) 0 at 48r weight:0.000000e+00
-# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000080 [112e,112d:0) 0 at 112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r weight:0.000000e+00
+# CHECK-DBG: %3 [48r,112r:0) 0 at 48r L0000000000000080 [48r,112r:0) 0 at 48r L0000000000000040 [48r,112r:0) 0 at 48r weight:0.000000e+00
+# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r weight:0.000000e+00
# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0 at 112r 1 at 32r weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B bb.0.entry:
+# CHECK-DBG: 16B %0:gpr64sp = ADDXri %stack.0, 0, 0
+# CHECK-DBG: 32B %5:gpr64common = nuw ADDXri %0:gpr64sp, 64, 0
+# CHECK-DBG: 48B undef %3.sub_32:gpr64 = MOVi32imm 64, implicit-def %3:gpr64
+# CHECK-DBG: 80B undef %4.sub_32:gpr64 = MOVi32imm 64, implicit-def %4:gpr64
+# CHECK-DBG: 112B dead %5:gpr64common, dead early-clobber %4:gpr64 = MOPSMemorySetPseudo %5:gpr64common(tied-def 0), %4:gpr64(tied-def 1), %3:gpr64, implicit-def dead $nzcv
+# CHECK-DBG: 128B RET_ReallyLR
+
---
name: test
tracksRegLiveness: true
@@ -43,9 +52,44 @@ body: |
# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0 at 48B-phi 1 at 320r 2 at 32r
# CHECK-DBG-SAME: weight:0.000000e+00
# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
-# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at x 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at x 3 at 304B-phi
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B bb.0:
+# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 48B bb.1:
+# CHECK-DBG: ; predecessors: %bb.0, %bb.7
+# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%)
+# CHECK-DBG: 64B bb.2:
+# CHECK-DBG: ; predecessors: %bb.1
+# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%)
+# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 96B bb.3:
+# CHECK-DBG: ; predecessors: %bb.2
+# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%)
+# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv
+# CHECK-DBG: 160B bb.4:
+# CHECK-DBG: ; predecessors: %bb.3
+# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%)
+# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv
+# CHECK-DBG: 208B bb.5:
+# CHECK-DBG: ; predecessors: %bb.4
+# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 240B undef %3.sub_32:gpr64 = MOVi32imm 1, implicit-def %3:gpr64
+# CHECK-DBG: 256B B %bb.7
+# CHECK-DBG: 272B bb.6:
+# CHECK-DBG: ; predecessors: %bb.4
+# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 288B %3:gpr64 = COPY $xzr
+# CHECK-DBG: 304B bb.7:
+# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6
+# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 320B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1
+# CHECK-DBG: 352B B %bb.1
---
name: reproducer
tracksRegLiveness: true
@@ -92,6 +136,42 @@ body: |
# CHECK-DBG-SAME: L0000000000000080 [224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at x 3 at 288B-phi
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at 80r 3 at 288B-phi
# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B bb.0:
+# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 32B %1:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 48B bb.1:
+# CHECK-DBG: ; predecessors: %bb.0, %bb.7
+# CHECK-DBG: successors: %bb.2(0x80000000); %bb.2(100.00%)
+# CHECK-DBG: 64B bb.2:
+# CHECK-DBG: ; predecessors: %bb.1
+# CHECK-DBG: successors: %bb.3(0x80000000); %bb.3(100.00%)
+# CHECK-DBG: 80B undef %3.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 96B bb.3:
+# CHECK-DBG: ; predecessors: %bb.2
+# CHECK-DBG: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%)
+# CHECK-DBG: 112B $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 144B Bcc 1, %bb.7, implicit killed $nzcv
+# CHECK-DBG: 160B bb.4:
+# CHECK-DBG: ; predecessors: %bb.3
+# CHECK-DBG: successors: %bb.6(0x40000000), %bb.5(0x40000000); %bb.6(50.00%), %bb.5(50.00%)
+# CHECK-DBG: 176B $nzcv = IMPLICIT_DEF
+# CHECK-DBG: 192B Bcc 1, %bb.6, implicit killed $nzcv
+# CHECK-DBG: 208B bb.5:
+# CHECK-DBG: ; predecessors: %bb.4
+# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 224B %3:gpr64 = IMPLICIT_DEF
+# CHECK-DBG: 240B B %bb.7
+# CHECK-DBG: 256B bb.6:
+# CHECK-DBG: ; predecessors: %bb.4
+# CHECK-DBG: successors: %bb.7(0x80000000); %bb.7(100.00%)
+# CHECK-DBG: 272B %3:gpr64 = COPY $xzr
+# CHECK-DBG: 288B bb.7:
+# CHECK-DBG: ; predecessors: %bb.3, %bb.5, %bb.6
+# CHECK-DBG: successors: %bb.1(0x80000000); %bb.1(100.00%)
+# CHECK-DBG: 304B %1:gpr64 = ADDXrs %1:gpr64, %3:gpr64, 1
+# CHECK-DBG: 336B B %bb.1
+
---
name: reproducer2
tracksRegLiveness: true
@@ -127,3 +207,78 @@ body: |
B %bb.1
...
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer3
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0 at 0B-phi 1 at 320r
+# CHECK-DBG: W1 [0B,16r:0) 0 at 0B-phi
+# CHECK-DBG: %0 [16r,64r:0) 0 at 16r weight:0.000000e+00
+# CHECK-DBG: %1 [32r,128r:0) 0 at 32r weight:0.000000e+00
+# CHECK-DBG: %2 [48r,64r:0) 0 at 48r weight:0.000000e+00
+# CHECK-DBG: %3 [64r,80r:0) 0 at 64r weight:0.000000e+00
+# CHECK-DBG: %4 [80r,176r:0) 0 at 80r weight:0.000000e+00
+# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: L0000000000000080 [128r,256r:0)[304B,320r:0) 0 at 128r
+# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0 at 176r 1 at 96r weight:0.000000e+00
+# CHECK-DBG: %9 [256r,272r:0) 0 at 256r weight:0.000000e+00
+# CHECK-DBG: ********** MACHINEINSTRS **********
+# CHECK-DBG: 0B bb.0:
+# CHECK-DBG: successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%)
+# CHECK-DBG: liveins: $w0, $w1
+# CHECK-DBG: 16B %0:gpr32 = COPY $w1
+# CHECK-DBG: 32B %1:gpr32 = COPY $w0
+# CHECK-DBG: 48B %2:gpr32 = UBFMWri %1:gpr32, 31, 30
+# CHECK-DBG: 64B %3:gpr32 = SUBWrs %2:gpr32, %0:gpr32, 1
+# CHECK-DBG: 80B %4:gpr32 = UBFMWri %3:gpr32, 1, 31
+# CHECK-DBG: 96B %8:gpr32common = MOVi32imm 1
+# CHECK-DBG: 112B undef %7.sub_32:gpr64 = MOVi32imm 1
+# CHECK-DBG: 128B undef %7.sub_32:gpr64 = BFMWri %7.sub_32:gpr64(tied-def 0), %1:gpr32, 31, 30, implicit-def %7:gpr64
+# CHECK-DBG: 176B %8:gpr32common = BFMWri %8:gpr32common(tied-def 0), %4:gpr32, 30, 29
+# CHECK-DBG: 192B dead $wzr = SUBSWri %8:gpr32common, 0, 0, implicit-def $nzcv
+# CHECK-DBG: 208B Bcc 2, %bb.2, implicit killed $nzcv
+# CHECK-DBG: 224B B %bb.1
+# CHECK-DBG: 240B bb.1:
+# CHECK-DBG: ; predecessors: %bb.0
+# CHECK-DBG: 256B %9:gpr64common = UBFMXri %7:gpr64, 62, 61
+# CHECK-DBG: 272B dead $xzr = LDRXui %9:gpr64common, 0
+# CHECK-DBG: 288B RET_ReallyLR
+# CHECK-DBG: 304B bb.2:
+# CHECK-DBG: ; predecessors: %bb.0
+# CHECK-DBG: 320B $x0 = COPY %7:gpr64
+# CHECK-DBG: 336B RET_ReallyLR implicit $x0
+
+---
+name: reproducer3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ %0:gpr32 = COPY killed $w1
+ %1:gpr32 = COPY killed $w0
+ %3:gpr32 = UBFMWri %1, 31, 30
+ %4:gpr32 = SUBWrs killed %3, killed %0, 1
+ %5:gpr32 = UBFMWri killed %4, 1, 31
+ %6:gpr32 = MOVi32imm 1
+ %7:gpr32 = COPY %6
+ %7:gpr32 = BFMWri %7, killed %1, 31, 30
+ %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
+ %9:gpr32common = COPY killed %6
+ %9:gpr32common = BFMWri %9, killed %5, 30, 29
+ dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
+ Bcc 2, %bb.2, implicit killed $nzcv
+ B %bb.1
+
+ bb.1:
+ %10:gpr64common = UBFMXri killed %8, 62, 61
+ dead $xzr = LDRXui killed %10, 0
+ RET_ReallyLR
+
+ bb.2:
+ $x0 = COPY killed %8
+ RET_ReallyLR implicit killed $x0
+
+...
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index afc7a39e18dc8..aae23265710ce 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -750,21 +750,25 @@ entry:
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
; CHECK-64-LABEL: testDoubleImm1:
; CHECK-64: # %bb.0: # %entry
+; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDoubleImm1:
; CHECK-32: # %bb.0: # %entry
+; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-32-NEXT: blr
;
; CHECK-64-P10-LABEL: testDoubleImm1:
; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testDoubleImm1:
; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-32-P10-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index fb55511162a7e..4f965780ab95e 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1754,7 +1754,11 @@ entry:
define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
; P9BE-LABEL: fromRegsConvdtoi:
; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
; P9BE-NEXT: xxmrghd vs0, vs2, vs4
+; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9BE-NEXT: xvcvdpsxws v2, vs0
; P9BE-NEXT: xxmrghd vs0, vs1, vs3
; P9BE-NEXT: xvcvdpsxws v3, vs0
@@ -1763,7 +1767,11 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
;
; P9LE-LABEL: fromRegsConvdtoi:
; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
+; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
; P9LE-NEXT: xvcvdpsxws v2, vs0
; P9LE-NEXT: xxmrghd vs0, vs4, vs2
; P9LE-NEXT: xvcvdpsxws v3, vs0
@@ -1772,6 +1780,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
;
; P8BE-LABEL: fromRegsConvdtoi:
; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8BE-NEXT: xxmrghd vs0, vs2, vs4
; P8BE-NEXT: xxmrghd vs1, vs1, vs3
; P8BE-NEXT: xvcvdpsxws v2, vs0
@@ -1781,6 +1793,10 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
;
; P8LE-LABEL: fromRegsConvdtoi:
; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8LE-NEXT: xxmrghd vs0, vs3, vs1
; P8LE-NEXT: xxmrghd vs1, vs4, vs2
; P8LE-NEXT: xvcvdpsxws v2, vs0
@@ -3240,7 +3256,11 @@ entry:
define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
; P9BE-LABEL: fromRegsConvdtoui:
; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
; P9BE-NEXT: xxmrghd vs0, vs2, vs4
+; P9BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9BE-NEXT: xvcvdpuxws v2, vs0
; P9BE-NEXT: xxmrghd vs0, vs1, vs3
; P9BE-NEXT: xvcvdpuxws v3, vs0
@@ -3249,7 +3269,11 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
;
; P9LE-LABEL: fromRegsConvdtoui:
; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9LE-NEXT: xxmrghd vs0, vs3, vs1
+; P9LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
; P9LE-NEXT: xvcvdpuxws v2, vs0
; P9LE-NEXT: xxmrghd vs0, vs4, vs2
; P9LE-NEXT: xvcvdpuxws v3, vs0
@@ -3258,6 +3282,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
;
; P8BE-LABEL: fromRegsConvdtoui:
; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P8BE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8BE-NEXT: xxmrghd vs0, vs2, vs4
; P8BE-NEXT: xxmrghd vs1, vs1, vs3
; P8BE-NEXT: xvcvdpuxws v2, vs0
@@ -3267,6 +3295,10 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
;
; P8LE-LABEL: fromRegsConvdtoui:
; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; P8LE-NEXT: # kill: def $f3 killed $f3 def $vsl3
+; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8LE-NEXT: xxmrghd vs0, vs3, vs1
; P8LE-NEXT: xxmrghd vs1, vs4, vs2
; P8LE-NEXT: xvcvdpuxws v2, vs0
@@ -4540,24 +4572,32 @@ entry:
define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
; P9BE-LABEL: fromRegsConvdtoll:
; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9BE-NEXT: xxmrghd vs0, vs1, vs2
; P9BE-NEXT: xvcvdpsxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fromRegsConvdtoll:
; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9LE-NEXT: xxmrghd vs0, vs2, vs1
; P9LE-NEXT: xvcvdpsxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromRegsConvdtoll:
; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8BE-NEXT: xxmrghd vs0, vs1, vs2
; P8BE-NEXT: xvcvdpsxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fromRegsConvdtoll:
; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8LE-NEXT: xxmrghd vs0, vs2, vs1
; P8LE-NEXT: xvcvdpsxds v2, vs0
; P8LE-NEXT: blr
@@ -5694,24 +5734,32 @@ entry:
define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
; P9BE-LABEL: fromRegsConvdtoull:
; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9BE-NEXT: xxmrghd vs0, vs1, vs2
; P9BE-NEXT: xvcvdpuxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: fromRegsConvdtoull:
; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9LE-NEXT: xxmrghd vs0, vs2, vs1
; P9LE-NEXT: xvcvdpuxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: fromRegsConvdtoull:
; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8BE-NEXT: xxmrghd vs0, vs1, vs2
; P8BE-NEXT: xvcvdpuxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: fromRegsConvdtoull:
; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; P8LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8LE-NEXT: xxmrghd vs0, vs2, vs1
; P8LE-NEXT: xvcvdpuxds v2, vs0
; P8LE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 7f6fdc7f88cd1..b40fbc3e16873 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -562,6 +562,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; CHECK-P8-NEXT: bl dummy
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxlxor f0, f0, f0
+; CHECK-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P8-NEXT: xxswapd vs0, vs0
; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
@@ -576,6 +577,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; CHECK-P9-NEXT: bl dummy
; CHECK-P9-NEXT: nop
; CHECK-P9-NEXT: xxlxor f0, f0, f0
+; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P9-NEXT: stxv vs0, 0(r30)
;
@@ -589,6 +591,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; CHECK-P9-BE-NEXT: bl dummy
; CHECK-P9-BE-NEXT: nop
; CHECK-P9-BE-NEXT: xxlxor f0, f0, f0
+; CHECK-P9-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9-BE-NEXT: xxmrghd vs0, vs0, vs1
; CHECK-P9-BE-NEXT: stxv vs0, 0(r30)
;
@@ -615,6 +618,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; CHECK-P7-NEXT: bl dummy
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: xxlxor f0, f0, f0
+; CHECK-P7-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P7-NEXT: xxmrghd vs0, vs1, vs0
; CHECK-P7-NEXT: xxswapd vs0, vs0
; CHECK-P7-NEXT: stxvd2x vs0, 0, r30
@@ -629,6 +633,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; P8-AIX-64-NEXT: bl .dummy[PR]
; P8-AIX-64-NEXT: nop
; P8-AIX-64-NEXT: xxlxor f0, f0, f0
+; P8-AIX-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-AIX-64-NEXT: xxmrghd vs0, vs0, vs1
; P8-AIX-64-NEXT: stxvd2x vs0, 0, r31
;
@@ -642,6 +647,7 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
; P8-AIX-32-NEXT: bl .dummy[PR]
; P8-AIX-32-NEXT: nop
; P8-AIX-32-NEXT: xxlxor f0, f0, f0
+; P8-AIX-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-AIX-32-NEXT: xxmrghd vs0, vs0, vs1
; P8-AIX-32-NEXT: stxvd2x vs0, 0, r31
test_entry:
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 04af0947c7a33..a72abf7007e8d 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -6,6 +6,7 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-LABEL: fneg_fdiv_splat:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxspltd 0, 1, 0
; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; CHECK-NEXT: xvredp 1, 0
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index eac4fb6f98bf7..4519cf4101f42 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -229,6 +229,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
; P8-NEXT: xscvspdpn f1, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: xxmrghd vs0, vs1, v30
; P8-NEXT: xscvspdpn f1, v31
; P8-NEXT: xvcvdpsp v29, vs0
@@ -239,6 +240,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
; P8-NEXT: xscvspdpn f1, vs0
; P8-NEXT: bl nearbyintf
; P8-NEXT: nop
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: xxmrghd vs0, v30, vs1
; P8-NEXT: li r3, 160
; P8-NEXT: xvcvdpsp v2, vs0
@@ -276,6 +278,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
; P9-NEXT: xscvspdpn f1, vs0
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: xxmrghd vs0, vs1, v30
; P9-NEXT: xscvspdpn f1, v31
; P9-NEXT: xvcvdpsp v29, vs0
@@ -286,6 +289,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
; P9-NEXT: xscvspdpn f1, vs0
; P9-NEXT: bl nearbyintf
; P9-NEXT: nop
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: xxmrghd vs0, v30, vs1
; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
@@ -326,6 +330,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
; P8-NEXT: bl nearbyint
; P8-NEXT: nop
; P8-NEXT: li r3, 144
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: xxmrghd v2, v30, vs1
; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
; P8-NEXT: li r3, 128
@@ -354,6 +359,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
; P9-NEXT: xxswapd vs1, v31
; P9-NEXT: bl nearbyint
; P9-NEXT: nop
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P9-NEXT: xxmrghd v2, v30, vs1
; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload
; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 19b4b1c9cdf95..21cb206ac43bb 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -70,6 +70,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 1, 61
; CHECK-NEXT: xscvspdpn 1, 62
; CHECK-NEXT: xscvspdpn 2, 63
@@ -83,6 +84,7 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: xscvspdpn 2, 0
; CHECK-NEXT: bl fmodf
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 0, 61, 1
; CHECK-NEXT: lxv 63, 80(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,6 +126,7 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
; CHECK-NEXT: xxswapd 2, 63
; CHECK-NEXT: bl fmod
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd 34, 61, 1
; CHECK-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
index 238e200bfc782..3ae0b02f79e27 100644
--- a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -41,39 +41,47 @@ define void @test(ptr %p1, ptr %p2) nounwind {
; CHECK-NEXT: xxswapd 61, 63
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 56, 1
; CHECK-NEXT: xxlor 1, 59, 59
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 60, 60
; CHECK-NEXT: xxmrgld 59, 0, 56
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 60, 1
; CHECK-NEXT: xxlor 1, 62, 62
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 61, 61
; CHECK-NEXT: xxmrgld 62, 0, 60
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 61, 1
; CHECK-NEXT: xxlor 1, 63, 63
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: xxlor 1, 57, 57
; CHECK-NEXT: xxmrgld 63, 0, 61
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 61, 1
; CHECK-NEXT: xxlor 1, 58, 58
; CHECK-NEXT: bl roundeven
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 160
; CHECK-NEXT: stxvd2x 63, 30, 29
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxswapd 0, 1
; CHECK-NEXT: stxvd2x 62, 30, 28
; CHECK-NEXT: stxvd2x 59, 30, 27
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 8d7253b5ce8e3..23748bca0b7b2 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -107,6 +107,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind {
; CHECK-NEXT: extsw r4, r3
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd vs0, vs1, v29
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: vextuwrx r3, r3, v31
@@ -123,6 +124,7 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) nounwind {
; CHECK-NEXT: xscvspdpn f1, vs0
; CHECK-NEXT: bl ldexpf
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd vs0, vs1, v29
; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
index 1b137c786cc91..203b3bd15490a 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll
@@ -294,6 +294,7 @@ define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
; CHECK-NEXT: addi r4, r1, 40
; CHECK-NEXT: bl modf
; CHECK-NEXT: nop
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd v2, v30, vs1
; CHECK-NEXT: lfd f0, 32(r1)
; CHECK-NEXT: lfd f1, 40(r1)
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index b006c78604648..0364166a1b29e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -928,21 +928,25 @@ entry:
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
; CHECK-LABEL: testDoubleImm1:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxmrghd v2, v2, vs1
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testDoubleImm1:
; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-BE-NEXT: xxpermdi v2, vs1, v2, 1
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testDoubleImm1:
; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9-NEXT: xxpermdi v2, vs1, v2, 1
; CHECK-P9-NEXT: blr
;
; AIX-P8-LABEL: testDoubleImm1:
; AIX-P8: # %bb.0: # %entry
+; AIX-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; AIX-P8-NEXT: xxpermdi v2, vs1, v2, 1
; AIX-P8-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 71c3069a406fe..5dac21ba447af 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -107,6 +107,10 @@ entry:
define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 {
; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: xsdivdp 3, 3, 6
@@ -116,6 +120,10 @@ define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double>
;
; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: xsdivdp 3, 3, 6
@@ -209,6 +217,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
; PC64LE-NEXT: bl fmod
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -239,6 +248,7 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl fmod
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 61, 1
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
@@ -390,6 +400,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
; PC64LE-NEXT: fmr 2, 30
; PC64LE-NEXT: bl fmod
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 29
; PC64LE-NEXT: fmr 2, 31
@@ -431,6 +442,7 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
; PC64LE9-NEXT: fmr 2, 30
; PC64LE9-NEXT: bl fmod
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 29
; PC64LE9-NEXT: fmr 2, 31
@@ -486,6 +498,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
; PC64LE-NEXT: xxswapd 2, 62
; PC64LE-NEXT: bl fmod
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 59, 1
; PC64LE-NEXT: xxlor 1, 61, 61
; PC64LE-NEXT: xxlor 2, 63, 63
@@ -498,6 +511,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 112
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 60, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 96
@@ -536,6 +550,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
; PC64LE9-NEXT: xxswapd 2, 62
; PC64LE9-NEXT: bl fmod
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 59, 1
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
@@ -546,6 +561,7 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl fmod
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 60, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
@@ -670,6 +686,10 @@ entry:
define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 {
; PC64LE-LABEL: constrained_vector_fmul_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: xsmuldp 3, 3, 6
@@ -679,6 +699,10 @@ define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double>
;
; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: xsmuldp 3, 3, 6
@@ -820,6 +844,10 @@ entry:
define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 {
; PC64LE-LABEL: constrained_vector_fadd_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: xsadddp 3, 3, 6
@@ -829,6 +857,10 @@ define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double>
;
; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: xsadddp 3, 3, 6
@@ -970,6 +1002,10 @@ entry:
define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 {
; PC64LE-LABEL: constrained_vector_fsub_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: xssubdp 3, 3, 6
@@ -979,6 +1015,10 @@ define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double>
;
; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: xssubdp 3, 3, 6
@@ -1105,6 +1145,8 @@ entry:
define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xssqrtdp 3, 3
; PC64LE-NEXT: xvsqrtdp 2, 0
@@ -1113,6 +1155,8 @@ define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xssqrtdp 3, 3
; PC64LE9-NEXT: xvsqrtdp 2, 0
@@ -1203,6 +1247,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
; PC64LE-NEXT: bl pow
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -1233,6 +1278,7 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl pow
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 61, 1
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
@@ -1384,6 +1430,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
; PC64LE-NEXT: fmr 2, 30
; PC64LE-NEXT: bl pow
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 29
; PC64LE-NEXT: fmr 2, 31
@@ -1425,6 +1472,7 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
; PC64LE9-NEXT: fmr 2, 30
; PC64LE9-NEXT: bl pow
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 29
; PC64LE9-NEXT: fmr 2, 31
@@ -1480,6 +1528,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
; PC64LE-NEXT: xxswapd 2, 62
; PC64LE-NEXT: bl pow
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 59, 1
; PC64LE-NEXT: xxlor 1, 61, 61
; PC64LE-NEXT: xxlor 2, 63, 63
@@ -1492,6 +1541,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 112
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 60, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 96
@@ -1530,6 +1580,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
; PC64LE9-NEXT: xxswapd 2, 62
; PC64LE9-NEXT: bl pow
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 59, 1
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
@@ -1540,6 +1591,7 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl pow
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 60, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
@@ -1618,6 +1670,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
; PC64LE-NEXT: bl __powidf2
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: ld 30, 80(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -1647,6 +1700,7 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
; PC64LE9-NEXT: mr 4, 30
; PC64LE9-NEXT: bl __powidf2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -1790,6 +1844,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
; PC64LE-NEXT: mr 4, 30
; PC64LE-NEXT: bl __powidf2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: mr 4, 30
@@ -1828,6 +1883,7 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
; PC64LE9-NEXT: mr 4, 30
; PC64LE9-NEXT: bl __powidf2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: mr 4, 30
@@ -1878,6 +1934,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
; PC64LE-NEXT: mr 4, 30
; PC64LE-NEXT: bl __powidf2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: mr 4, 30
@@ -1890,6 +1947,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: ld 30, 96(1) # 8-byte Folded Reload
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -1923,6 +1981,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
; PC64LE9-NEXT: mr 4, 30
; PC64LE9-NEXT: bl __powidf2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: mr 4, 30
@@ -1933,6 +1992,7 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
; PC64LE9-NEXT: mr 4, 30
; PC64LE9-NEXT: bl __powidf2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -2003,6 +2063,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl sin
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -2027,6 +2088,7 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl sin
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -2149,6 +2211,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl sin
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl sin
@@ -2181,6 +2244,7 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl sin
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl sin
@@ -2224,6 +2288,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl sin
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl sin
@@ -2234,6 +2299,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -2262,6 +2328,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl sin
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl sin
@@ -2270,6 +2337,7 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl sin
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -2338,6 +2406,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl cos
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -2362,6 +2431,7 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl cos
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -2484,6 +2554,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl cos
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl cos
@@ -2516,6 +2587,7 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl cos
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl cos
@@ -2559,6 +2631,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl cos
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl cos
@@ -2569,6 +2642,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -2597,6 +2671,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl cos
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl cos
@@ -2605,6 +2680,7 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl cos
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -2673,6 +2749,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl exp
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -2697,6 +2774,7 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl exp
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -2819,6 +2897,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl exp
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl exp
@@ -2851,6 +2930,7 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl exp
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl exp
@@ -2894,6 +2974,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl exp
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl exp
@@ -2904,6 +2985,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -2932,6 +3014,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl exp
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl exp
@@ -2940,6 +3023,7 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl exp
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -3008,6 +3092,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl exp2
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -3032,6 +3117,7 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl exp2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -3154,6 +3240,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl exp2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl exp2
@@ -3186,6 +3273,7 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl exp2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl exp2
@@ -3229,6 +3317,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl exp2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl exp2
@@ -3239,6 +3328,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -3267,6 +3357,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl exp2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl exp2
@@ -3275,6 +3366,7 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl exp2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -3343,6 +3435,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl log
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -3367,6 +3460,7 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -3489,6 +3583,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl log
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl log
@@ -3521,6 +3616,7 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl log
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl log
@@ -3564,6 +3660,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl log
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl log
@@ -3574,6 +3671,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -3602,6 +3700,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl log
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl log
@@ -3610,6 +3709,7 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -3678,6 +3778,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl log10
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -3702,6 +3803,7 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log10
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -3824,6 +3926,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl log10
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl log10
@@ -3856,6 +3959,7 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl log10
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl log10
@@ -3899,6 +4003,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl log10
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl log10
@@ -3909,6 +4014,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -3937,6 +4043,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl log10
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl log10
@@ -3945,6 +4052,7 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log10
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -4013,6 +4121,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl log2
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -4037,6 +4146,7 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -4159,6 +4269,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl log2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl log2
@@ -4191,6 +4302,7 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl log2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl log2
@@ -4234,6 +4346,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl log2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl log2
@@ -4244,6 +4357,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -4272,6 +4386,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl log2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl log2
@@ -4280,6 +4395,7 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl log2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -4387,6 +4503,8 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_rint_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xsrdpic 3, 3
; PC64LE-NEXT: xvrdpic 2, 0
@@ -4395,6 +4513,8 @@ define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_rint_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xsrdpic 3, 3
; PC64LE9-NEXT: xvrdpic 2, 0
@@ -4479,6 +4599,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -4503,6 +4624,7 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -4625,6 +4747,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl nearbyint
@@ -4657,6 +4780,7 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl nearbyint
@@ -4700,6 +4824,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl nearbyint
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl nearbyint
@@ -4710,6 +4835,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -4738,6 +4864,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl nearbyint
@@ -4746,6 +4873,7 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl nearbyint
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -4927,6 +5055,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: std 0, 80(1)
@@ -4950,6 +5082,10 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: stdu 1, -48(1)
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: std 0, 64(1)
@@ -5159,6 +5295,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
; PC64LE-NEXT: mflr 0
; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 5, 4
; PC64LE-NEXT: xxmrghd 1, 2, 1
; PC64LE-NEXT: std 0, 80(1)
@@ -5182,6 +5322,10 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
; PC64LE9-NEXT: stdu 1, -48(1)
+; PC64LE9-NEXT: # kill: def $f5 killed $f5 def $vsl5
+; PC64LE9-NEXT: # kill: def $f4 killed $f4 def $vsl4
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 5, 4
; PC64LE9-NEXT: xxmrghd 1, 2, 1
; PC64LE9-NEXT: std 0, 64(1)
@@ -6520,6 +6664,8 @@ entry:
define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_ceil_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xsrdpip 3, 3
; PC64LE-NEXT: xvrdpip 2, 0
@@ -6528,6 +6674,8 @@ define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xsrdpip 3, 3
; PC64LE9-NEXT: xvrdpip 2, 0
@@ -6628,6 +6776,8 @@ entry:
define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_floor_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xsrdpim 3, 3
; PC64LE-NEXT: xvrdpim 2, 0
@@ -6636,6 +6786,8 @@ define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_floor_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xsrdpim 3, 3
; PC64LE9-NEXT: xvrdpim 2, 0
@@ -6736,6 +6888,8 @@ entry:
define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_round_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xsrdpi 3, 3
; PC64LE-NEXT: xvrdpi 2, 0
@@ -6744,6 +6898,8 @@ define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_round_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xsrdpi 3, 3
; PC64LE9-NEXT: xvrdpi 2, 0
@@ -6843,6 +6999,8 @@ entry:
define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
; PC64LE-LABEL: constrained_vector_trunc_v3f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 0, 2, 1
; PC64LE-NEXT: xsrdpiz 3, 3
; PC64LE-NEXT: xvrdpiz 2, 0
@@ -6851,6 +7009,8 @@ define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
;
; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: # kill: def $f2 killed $f2 def $vsl2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 0, 2, 1
; PC64LE9-NEXT: xsrdpiz 3, 3
; PC64LE9-NEXT: xvrdpiz 2, 0
@@ -8049,6 +8209,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
; PC64LE-NEXT: bl tan
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 62, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 48
@@ -8073,6 +8234,7 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl tan
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 62, 1
; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload
@@ -8195,6 +8357,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
; PC64LE-NEXT: fmr 1, 30
; PC64LE-NEXT: bl tan
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 31
; PC64LE-NEXT: bl tan
@@ -8227,6 +8390,7 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
; PC64LE9-NEXT: fmr 1, 30
; PC64LE9-NEXT: bl tan
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 31
; PC64LE9-NEXT: bl tan
@@ -8270,6 +8434,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: xxswapd 1, 62
; PC64LE-NEXT: bl tan
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 61, 1
; PC64LE-NEXT: xxlor 1, 63, 63
; PC64LE-NEXT: bl tan
@@ -8280,6 +8445,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -8308,6 +8474,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 62
; PC64LE9-NEXT: bl tan
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 61, 1
; PC64LE9-NEXT: xscpsgndp 1, 63, 63
; PC64LE9-NEXT: bl tan
@@ -8316,6 +8483,7 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
; PC64LE9-NEXT: xxswapd 1, 63
; PC64LE9-NEXT: bl tan
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 61, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
@@ -8390,6 +8558,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
; PC64LE-NEXT: bl atan2
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 80
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 34, 61, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 64
@@ -8420,6 +8589,7 @@ define <2 x double> @constrained_vector_atan2_v2f64(<2 x double> %x, <2 x double
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl atan2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 34, 61, 1
; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload
; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload
@@ -8571,6 +8741,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
; PC64LE-NEXT: fmr 2, 30
; PC64LE-NEXT: bl atan2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 63, 1, 63
; PC64LE-NEXT: fmr 1, 29
; PC64LE-NEXT: fmr 2, 31
@@ -8612,6 +8783,7 @@ define <3 x double> @constrained_vector_atan2_v3f64(<3 x double> %x, <3 x double
; PC64LE9-NEXT: fmr 2, 30
; PC64LE9-NEXT: bl atan2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 63, 1, 63
; PC64LE9-NEXT: fmr 1, 29
; PC64LE9-NEXT: fmr 2, 31
@@ -8667,6 +8839,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
; PC64LE-NEXT: xxswapd 2, 62
; PC64LE-NEXT: bl atan2
; PC64LE-NEXT: nop
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 62, 59, 1
; PC64LE-NEXT: xxlor 1, 61, 61
; PC64LE-NEXT: xxlor 2, 63, 63
@@ -8679,6 +8852,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
; PC64LE-NEXT: nop
; PC64LE-NEXT: li 3, 112
; PC64LE-NEXT: vmr 2, 30
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE-NEXT: xxmrghd 35, 60, 1
; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PC64LE-NEXT: li 3, 96
@@ -8717,6 +8891,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
; PC64LE9-NEXT: xxswapd 2, 62
; PC64LE9-NEXT: bl atan2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 62, 59, 1
; PC64LE9-NEXT: xscpsgndp 1, 61, 61
; PC64LE9-NEXT: xscpsgndp 2, 63, 63
@@ -8727,6 +8902,7 @@ define <4 x double> @constrained_vector_atan2_v4f64(<4 x double> %x, <4 x double
; PC64LE9-NEXT: xxswapd 2, 63
; PC64LE9-NEXT: bl atan2
; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
; PC64LE9-NEXT: xxmrghd 35, 60, 1
; PC64LE9-NEXT: vmr 2, 30
; PC64LE9-NEXT: lxv 63, 96(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
new file mode 100644
index 0000000000000..ea7454faad218
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
+
+%struct.wibble = type { %struct.wombat }
+%struct.wombat = type { %struct.ham, [3 x i8] }
+%struct.ham = type { %struct.zot }
+%struct.zot = type { %struct.blam }
+%struct.blam = type { %struct.ham.0 }
+%struct.ham.0 = type { %struct.bar }
+%struct.bar = type { %struct.bar.1 }
+%struct.bar.1 = type { %struct.baz, i8 }
+%struct.baz = type { %struct.snork }
+%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
+%struct.spam = type { %struct.snork.2, %struct.snork.2 }
+%struct.snork.2 = type { i32 }
+%struct.snork.3 = type { %struct.baz, i8, [3 x i8] }
+
+define void @foo(ptr %arg, ptr %arg1, i40 %arg2, ptr %arg3, i32 %arg4) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: movl %r8d, %r14d
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: incl %r14d
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: # implicit-def: $r12
+; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: jmp .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb17
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movq %r15, %r13
+; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: testq %rbx, %rbx
+; CHECK-NEXT: sete %r15b
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _Znwm at PLT
+; CHECK-NEXT: shll $4, %r15d
+; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; CHECK-NEXT: movq %r12, %rcx
+; CHECK-NEXT: shrq $32, %rcx
+; CHECK-NEXT: movb %cl, 12(%rax)
+; CHECK-NEXT: movl %r12d, 8(%rax)
+; CHECK-NEXT: movq %r15, %rbx
+; CHECK-NEXT: movq %r13, %r15
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; CHECK-NEXT: decl %r14d
+; CHECK-NEXT: je .LBB0_8
+; CHECK-NEXT: .LBB0_3: # %bb7
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: callq widget at PLT
+; CHECK-NEXT: cmpb $-5, (%r13)
+; CHECK-NEXT: jae .LBB0_5
+; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movl %r12d, %r12d
+; CHECK-NEXT: cmpq %r15, %rbx
+; CHECK-NEXT: jbe .LBB0_1
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_5: # %bb12
+; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: movq 0, %rax
+; CHECK-NEXT: movq 8, %rax
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; CHECK-NEXT: cmpq %r15, %rbx
+; CHECK-NEXT: jbe .LBB0_1
+; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: decl %r14d
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: .LBB0_8: # %bb21
+; CHECK-NEXT: cmpb $0, 12(%rax)
+; CHECK-NEXT: jne .LBB0_10
+; CHECK-NEXT: # %bb.9: # %bb26
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_10: # %bb25
+; CHECK-NEXT: .cfi_def_cfa %rbp, 16
+; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: callq pluto at PLT
+bb:
+ br label %bb7
+
+bb5: ; preds = %bb17, %bb14
+ %phi = phi ptr [ %call19, %bb17 ], [ null, %bb14 ]
+ %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb14 ]
+ %add = add i32 %phi9, 1
+ %icmp = icmp eq i32 %phi9, %arg4
+ br i1 %icmp, label %bb21, label %bb7
+
+bb7: ; preds = %bb5, %bb
+ %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
+ %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+ %phi10 = phi i40 [ poison, %bb ], [ %phi15, %bb5 ]
+ %call = call ptr @widget()
+ %load = load i8, ptr %arg1, align 8
+ %icmp11 = icmp ult i8 %load, -5
+ %and = and i40 %phi10, 4294967295
+ br i1 %icmp11, label %bb14, label %bb12
+
+bb12: ; preds = %bb7
+ %load13 = load volatile { i64, i64 }, ptr null, align 4294967296
+ br label %bb14
+
+bb14: ; preds = %bb12, %bb7
+ %phi15 = phi i40 [ %and, %bb7 ], [ %arg2, %bb12 ]
+ %icmp16 = icmp ugt ptr %phi8, %arg
+ br i1 %icmp16, label %bb5, label %bb17
+
+bb17: ; preds = %bb14
+ %icmp18 = icmp eq ptr %phi8, null
+ %zext = zext i1 %icmp18 to i64
+ %call19 = call ptr @_Znwm(i64 0)
+ %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
+ %getelementptr20 = getelementptr i8, ptr %call19, i64 8
+ store i40 %phi15, ptr %getelementptr20, align 4
+ br label %bb5
+
+bb21: ; preds = %bb5
+ %getelementptr22 = getelementptr %struct.snork.3, ptr %phi, i64 0, i32 1
+ %load23 = load i8, ptr %getelementptr22, align 4
+ %icmp24 = icmp eq i8 %load23, 0
+ br i1 %icmp24, label %bb26, label %bb25
+
+bb25: ; preds = %bb21
+ call void @pluto(ptr %arg)
+ unreachable
+
+bb26: ; preds = %bb21
+ ret void
+}
+
+define void @eggs(ptr %arg, ptr %arg1) {
+; CHECK-LABEL: eggs:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq %rsi, %rdi
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %r8d, %r8d
+; CHECK-NEXT: callq foo at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+bb:
+ call void @foo(ptr %arg1, ptr %arg, i40 0, ptr null, i32 0)
+ ret void
+}
+
+declare ptr @widget()
+
+declare void @pluto(ptr)
+
+declare ptr @_Znwm(i64)
+
+attributes #0 = { noinline "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
index 8241a1757af52..0bc208dc709d7 100644
--- a/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
+++ b/llvm/test/CodeGen/X86/coalescer-implicit-def-regression-imp-operand-assert.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
---
name: rematerialize_subreg_to_reg_added_impdef_1
tracksRegLiveness: true
@@ -9,7 +9,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x2aaaaaab), %bb.2(0x55555555)
; CHECK-NEXT: liveins: $edi
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
; CHECK-NEXT: JCC_1 %bb.2, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
@@ -28,7 +28,7 @@ body: |
; CHECK-NEXT: JCC_1 %bb.5, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al
+ ; CHECK-NEXT: dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $al, implicit-def $al
; CHECK-NEXT: RET 0, killed undef $al
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
diff --git a/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
new file mode 100644
index 0000000000000..2e6395f065e25
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -enable-subreg-liveness -verify-coalescing -o - %s | FileCheck %s
+
+---
+name: requires_new_subrange_coalesce_subreg_to_reg
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: requires_new_subrange_coalesce_subreg_to_reg
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = COPY $eax
+ ; CHECK-NEXT: %b:gr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+ ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit undef $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef %a.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: %c.sub_32bit:gr64 = COPY %a
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: %c.sub_32bit:gr64 = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+ ; CHECK-NEXT: RET 0, implicit %c
+ bb.0:
+ liveins: $eax
+ %init_eax:gr32 = COPY $eax
+ %a:gr64 = SUBREG_TO_REG 0, %init_eax, %subreg.sub_32bit
+ %b:gr32 = IMPLICIT_DEF
+ %c:gr64 = INSERT_SUBREG %a, %b, %subreg.sub_32bit
+ JCC_1 %bb.2, 4, implicit undef $eflags
+
+ bb.1:
+ %imm0:gr32 = MOV32r0 implicit-def dead $eflags
+ %a = SUBREG_TO_REG 0, %imm0, %subreg.sub_32bit
+ %c.sub_32bit = COPY %a
+
+ bb.2:
+ %c.sub_32bit = SUBREG_TO_REG %a, %b, %subreg.sub_32bit
+ RET 0, implicit %c
+
+...
diff --git a/llvm/test/CodeGen/X86/pr76416.ll b/llvm/test/CodeGen/X86/pr76416.ll
new file mode 100644
index 0000000000000..68e9ef9c87f6e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr76416.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;
+; Reproducer from https://github.com/llvm/llvm-project/issues/76416
+;
+
+ at load_p = external global ptr, align 8
+ at load_data = external global i8, align 1
+
+define dso_local void @pr76416() {
+; CHECK-LABEL: pr76416:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jg .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_2: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jle .LBB0_2
+; CHECK-NEXT: .LBB0_3: # %for.end
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq load_p at GOTPCREL(%rip), %rax
+; CHECK-NEXT: movq load_data at GOTPCREL(%rip), %rcx
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_4: # %for.cond1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq (%rax), %rdx
+; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: movzbl (%rdx,%rsi), %edx
+; CHECK-NEXT: movb %dl, (%rcx)
+; CHECK-NEXT: leal 1(%rsi), %edx
+; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ %alloca = alloca i32, align 4
+ store i32 0, ptr %alloca, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %load.from.alloca.0 = load i32, ptr %alloca, align 4
+ %cmp = icmp slt i32 %load.from.alloca.0, 4
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) nounwind
+ %load.from.alloca.1 = load i32, ptr %alloca, align 4
+ %inc = add nsw i32 %load.from.alloca.1, 1
+ store i32 %inc, ptr %alloca, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ store i32 0, ptr %alloca, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1, %for.end
+ call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind
+ %load.from.load_p = load ptr, ptr @load_p, align 8
+ %regs = getelementptr inbounds { [4 x i8] }, ptr %load.from.load_p, i32 0, i32 0
+ %load.from.alloca.2 = load i32, ptr %alloca, align 4
+ %idxprom = sext i32 %load.from.alloca.2 to i64
+ %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+ %load.with.gep.ptr = load i8, ptr %arrayidx, align 1
+ store i8 %load.with.gep.ptr, ptr @load_data, align 1
+ %load.from.alloca.3 = load i32, ptr %alloca, align 4
+ %inc2 = add nsw i32 %load.from.alloca.3, 1
+ store i32 %inc2, ptr %alloca, align 4
+ br label %for.cond1
+}
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
index c8146f099b814..dc690719e8581 100644
--- a/llvm/test/CodeGen/X86/subreg-fail.mir
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -14,8 +14,8 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: test1
- ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
- ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+ ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm]] :: (volatile load (s32) from `ptr undef`)
+ ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg, implicit-def [[MOV32rm1]] :: (volatile load (s32) from `ptr undef`)
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
new file mode 100644
index 0000000000000..e4fb812486c25
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
@@ -0,0 +1,451 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=x86_64-- -run-pass=register-coalescer -o - %s | FileCheck %s --match-full-lines
+
+# We cannot lose the liveness of the high subregister of %1 when
+# coalesced with %0, so introduce an implicit-def of the super
+# register on the MOV.
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: subreg_to_reg_folds_to_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rax
+
+ ; CHECK-LABEL: name: subreg_to_reg_folds_to_undef
+ ; CHECK: liveins: $rax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_with_sub_8bit = COPY $rax
+ ; CHECK-NEXT: undef [[MOV32rr:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rr [[COPY]].sub_32bit, implicit-def [[MOV32rr]]
+ ; CHECK-NEXT: RET 0, implicit [[MOV32rr]]
+ %0:gr64 = COPY killed $rax
+ %1:gr32 = COPY killed %0.sub_32bit
+ %2:gr32 = MOV32rr killed %1
+ %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
+ %4:gr64 = COPY killed %3
+ RET 0, implicit %4
+
+...
+
+---
+name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_subreg_def_into_subreg_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_def_with_super_def_to_reg64
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+ %1:gr64 = SUBREG_TO_REG 0, killed %0.sub_32bit, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_already_defs_other_subreg
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_]].sub_8bit, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit [[MOV32r0_]]
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef %0.sub_8bit
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ INLINEASM &"", 0, implicit %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit undef $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+
+# Reduced realistic case which was asserting after introducing new implicit-defs
+---
+name: coalesce_needs_implicit_defs
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_needs_implicit_defs
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]], implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]], implicit-def [[MOV32r0_1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]]
+ ; CHECK-NEXT: JMP_1 %bb.1
+ bb.0:
+ liveins: $rdi
+
+ %0:gr64 = COPY killed $rdi
+ %1:gr32 = MOV32r0 implicit-def dead $eflags
+ %2:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+ %3:gr64 = COPY killed %2
+
+ bb.1:
+ %4:gr64 = COPY killed %3
+ %5:gr32 = MOV32r0 implicit-def dead $eflags
+ TEST64rr killed %4, %4, implicit-def $eflags
+ %6:gr8 = SETCCr 4, implicit killed $eflags
+ %7:gr32 = COPY killed %5
+ %7.sub_8bit:gr32 = COPY killed %6
+ %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %9:gr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32bit
+ $rdi = COPY %9
+ CALL64r killed %9, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %10:gr64 = COPY killed %8
+ %10:gr64 = SHL64ri %10, 4, implicit-def dead $eflags
+ %11:gr64 = COPY killed %10
+ %11:gr64 = ADD64rr %11, %0, implicit-def dead $eflags
+ %3:gr64 = COPY killed %11
+ JMP_1 %bb.1
+
+...
+
+# Make sure to add the 'undef' flag to the result register %2,
+# because the top 32bits are not defined.
+---
+name: coalesce_add_implicitdef_and_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_add_implicitdef_and_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = ADD32ri [[COPY]].sub_32bit, -34, implicit-def $eflags, implicit-def [[COPY]]
+ ; CHECK-NEXT: FAKE_USE [[COPY]]
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ liveins: $eflags, $edx
+ %0:gr32 = COPY $edx
+ JMP_1 %bb.1
+
+ bb.1:
+ %1:gr32 = COPY %0
+ %1:gr32 = ADD32ri %1, -34, implicit-def $eflags
+ %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit
+ FAKE_USE %2
+ RET 0
+...
+
+# We can't mark the destination register as 'undef' or add implicit-def
+# because the top 24 bits of %0:gr32 are retained by the SUBREG_TO_REG.
+#
+# For example, if this were to result in:
+#
+# undef %2.sub_32bit:gr64_with_sub_8bit = COPY $edx
+# %1:gr8 = SETCCr 4, implicit $eflags
+# JMP_1 %bb.1
+#
+# bb.1:
+# undef %2.sub_8bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+#
+# Then this says that the top 56 bits of %2 are undef. That's not correct
+# because only the top 32 bits are undef.
+---
+name: coalesce_dont_add_implicitdef_or_undef
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_dont_add_implicitdef_or_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $eflags, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $edx
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = COPY [[SETCCr]]
+ ; CHECK-NEXT: FAKE_USE [[COPY]]
+ ; CHECK-NEXT: RET 0
+ bb.0:
+ liveins: $eflags, $edx
+ %0:gr32 = COPY $edx
+ %1:gr8 = SETCCr 4, implicit killed $eflags
+ JMP_1 %bb.1
+
+ bb.1:
+ %0.sub_8bit:gr32 = COPY %1
+ %2:gr64_with_sub_8bit = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ FAKE_USE %2
+ RET 0
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_def
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+ ; CHECK-NEXT: CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ $rdi = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ CALL64r killed $rdi, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $eax
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_physreg_use
+ ; CHECK: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, $eax, %subreg.sub_32bit
+ ; CHECK-NEXT: $rdi = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: CALL64r [[SUBREG_TO_REG]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ $eax = MOV32r0 implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed $eax, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+# Coalesced instruction is a copy with other implicit operands
+---
+name: coalesce_copy_into_subreg_to_reg64
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $eax
+ ; CHECK-LABEL: name: coalesce_copy_into_subreg_to_reg64
+ ; CHECK: liveins: $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = COPY $eax, implicit-def dead $eflags, implicit-def [[COPY]]
+ ; CHECK-NEXT: $rdi = COPY [[COPY]]
+ ; CHECK-NEXT: CALL64r [[COPY]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = COPY $eax, implicit-def dead $eflags
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_multiple_redef_value
+ ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef [[MOV32r0_]].sub_32bit, implicit [[MOV32r0_]].sub_32bit, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: $rdi = COPY [[MOV32r0_]]
+ ; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ %0:gr32 = MOV32r0 implicit-def dead $eflags
+ INLINEASM &"", 0, implicit-def %0, implicit %0
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_block_liveout
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: $rdi = COPY %1
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: RET 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ INLINEASM &"", 0, implicit-def %0:gr32
+ JCC_1 %bb.1, 4, implicit undef $eflags
+ JMP_1 %bb.2
+
+ bb.1:
+ %1:gr64 = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64r killed %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0
+
+ bb.2:
+
+...
+
+---
+name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+frameInfo:
+ adjustsStack: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: coalesce_mov32r0_into_subreg_to_reg64_def_is_phi_def
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, implicit-def undef %1.sub_32bit, implicit-def %1
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit undef $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $rdi = COPY %1
+ ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+
+ INLINEASM &"", 0, implicit-def %0:gr32
+ JCC_1 %bb.1, 4, implicit undef $eflags
+ JMP_1 %bb.2
+
+ bb.1:
+ %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32bit
+ $rdi = COPY %1
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ CALL64r %1, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ JMP_1 %bb.1
+
+ bb.2:
+
+...
>From 7228ceb9749d9491710bfa3f872c261942ff15bc Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 27 Oct 2025 11:26:24 +0000
Subject: [PATCH 2/5] Fixes for issues reported when landing #134408
The register coalescer ran into some asserts:
* The newly added code tried to get the LiveInterval for a physical
register (unguarded path)
* The assert 'assert(SubregToRegSrcInsts.empty() && "can this happen?");'
could happen when using SUBREG_TO_REG to say that the top bits
of the second register in a {128, 128} register tuple are zero, e.g.
%8.qsub1:qq = MOVIv2d_ns 0
%4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 10 +-
llvm/test/CodeGen/AArch64/pr151592.mir | 168 ++++++++++++++++++
llvm/test/CodeGen/AArch64/pr151888.mir | 17 ++
llvm/test/CodeGen/AArch64/pr164181.ll | 123 ++++++-------
.../CodeGen/LoongArch/lasx/build-vector.ll | 14 +-
llvm/test/CodeGen/LoongArch/lasx/fpowi.ll | 16 +-
.../LoongArch/lasx/scalar-to-vector.ll | 4 +-
llvm/test/CodeGen/PowerPC/half.ll | 1 +
.../CodeGen/X86/subreg-to-reg-coalescing.mir | 12 +-
9 files changed, 276 insertions(+), 89 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/pr151592.mir
create mode 100644 llvm/test/CodeGen/AArch64/pr151888.mir
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index b606b1cd5504b..fb4175a1c3452 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2301,7 +2301,8 @@ bool RegisterCoalescer::joinCopy(
}
SmallVector<MachineInstr *> SubregToRegSrcInsts;
- if (CopyMI->isSubregToReg()) {
+ Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) {
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
//
// %0:gpr32 = movimm32 ..
@@ -2319,8 +2320,7 @@ bool RegisterCoalescer::joinCopy(
// // require an implicit-def,
// // because it has nothing to
// // do with the SUBREG_TO_REG.
- LiveInterval &SrcInt =
- LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
@@ -2397,10 +2397,8 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx()) {
- assert(SubregToRegSrcInsts.empty() && "can this happen?");
+ if (CP.getDstIdx())
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- }
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
SubregToRegSrcInsts);
diff --git a/llvm/test/CodeGen/AArch64/pr151592.mir b/llvm/test/CodeGen/AArch64/pr151592.mir
new file mode 100644
index 0000000000000..dbcc1f8c08e9a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr151592.mir
@@ -0,0 +1,168 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s
+---
+name: reproducer
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: reproducer
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0, implicit-def [[MOVIv2d_ns]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub
+ ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0
+ ; CHECK-NEXT: B %bb.1
+ bb.0:
+ liveins: $w0, $x1
+ %0:gpr64common = COPY $x1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY %1:gpr32
+ undef %8.qsub1:qq = MOVIv2d_ns 0
+ %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub
+ %5:gpr64sp = COPY $xzr
+
+ bb.1:
+ TBNZW %2:gpr32, 0, %bb.3
+ B %bb.2
+
+ bb.2:
+ %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq
+ ST2Twov2d %8:qq, %0:gpr64common
+
+ bb.3:
+ STR_ZXI %4:zpr, %0:gpr64common, 1
+ STR_ZXI %4:zpr, %0:gpr64common, 0
+ B %bb.1
+...
+---
+name: reproducer2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: reproducer2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0
+ ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].zsub
+ ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 1
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub1, [[COPY]], 0
+ ; CHECK-NEXT: B %bb.1
+ bb.0:
+ liveins: $w0, $x1
+ %0:gpr64common = COPY $x1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY %1:gpr32
+ undef %8.qsub0:qq = MOVIv2d_ns 0
+ %8.qsub1:qq = MOVIv2d_ns 0
+ %4:zpr = SUBREG_TO_REG 0, %8.qsub1:qq, %subreg.zsub
+ %5:gpr64sp = COPY $xzr
+
+ bb.1:
+ TBNZW %2:gpr32, 0, %bb.3
+ B %bb.2
+
+ bb.2:
+ %8.dsub:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub0:qq
+ ST2Twov2d %8:qq, %0:gpr64common
+
+ bb.3:
+ STR_ZXI %4:zpr, %0:gpr64common, 1
+ STR_ZXI %4:zpr, %0:gpr64common, 0
+ B %bb.1
+...
+---
+name: reproducer3
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: reproducer3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: undef [[MOVIv2d_ns:%[0-9]+]].qsub1:zpr2 = MOVIv2d_ns 0
+ ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].zsub:zpr2 = MOVIv2d_ns 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $xzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.3
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]].dsub1:zpr2 = LDRDui [[COPY2]], 0, implicit-def [[MOVIv2d_ns]].qsub1
+ ; CHECK-NEXT: ST2Twov2d [[MOVIv2d_ns]].zsub_qsub1, [[COPY]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 1
+ ; CHECK-NEXT: STR_ZXI [[MOVIv2d_ns]].zsub0, [[COPY]], 0
+ ; CHECK-NEXT: B %bb.1
+ bb.0:
+ liveins: $w0, $x1
+ %0:gpr64common = COPY $x1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY %1:gpr32
+ undef %8.qsub1:qq = MOVIv2d_ns 0
+ %8.qsub0:qq = MOVIv2d_ns 0
+ %4:zpr = SUBREG_TO_REG 0, %8.qsub0:qq, %subreg.zsub
+ %5:gpr64sp = COPY $xzr
+
+ bb.1:
+ TBNZW %2:gpr32, 0, %bb.3
+ B %bb.2
+
+ bb.2:
+ %8.dsub1:qq = LDRDui %5:gpr64sp, 0, implicit-def %8.qsub1:qq
+ ST2Twov2d %8:qq, %0:gpr64common
+
+ bb.3:
+ STR_ZXI %4:zpr, %0:gpr64common, 1
+ STR_ZXI %4:zpr, %0:gpr64common, 0
+ B %bb.1
+...
diff --git a/llvm/test/CodeGen/AArch64/pr151888.mir b/llvm/test/CodeGen/AArch64/pr151888.mir
new file mode 100644
index 0000000000000..5b66f136cbc4f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr151888.mir
@@ -0,0 +1,17 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=register-coalescer -o - %s | FileCheck %s
+---
+name: reproducer
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: reproducer
+ ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri $xzr, 0, 31
+ ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %1:gpr32 = COPY killed $wzr
+ %2:gpr64 = SUBREG_TO_REG 0, %1:gpr32, %subreg.sub_32
+ %3:gpr64 = UBFMXri %2:gpr64, 0, 31
+ $x0 = COPY %3:gpr64
+ RET_ReallyLR implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index 18732ae5ae300..c107e3d50619e 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -29,11 +29,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: str w4, [sp, #72] // 4-byte Spill
; CHECK-NEXT: str w3, [sp, #112] // 4-byte Spill
; CHECK-NEXT: str w5, [sp, #36] // 4-byte Spill
-; CHECK-NEXT: tbz w5, #0, .LBB0_43
+; CHECK-NEXT: tbz w5, #0, .LBB0_44
; CHECK-NEXT: // %bb.1: // %for.body41.lr.ph
; CHECK-NEXT: ldr x4, [sp, #312]
; CHECK-NEXT: ldr x14, [sp, #280]
-; CHECK-NEXT: tbz w0, #0, .LBB0_42
+; CHECK-NEXT: tbz w0, #0, .LBB0_43
; CHECK-NEXT: // %bb.2: // %for.body41.us.preheader
; CHECK-NEXT: ldrb w8, [sp, #368]
; CHECK-NEXT: ldrb w12, [sp, #256]
@@ -91,8 +91,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Child Loop BB0_8 Depth 3
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
-; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_27 Depth 5
+; CHECK-NEXT: // Child Loop BB0_40 Depth 5
; CHECK-NEXT: ldr w8, [sp, #20] // 4-byte Reload
; CHECK-NEXT: mov x12, x24
; CHECK-NEXT: str x24, [sp, #48] // 8-byte Spill
@@ -116,8 +116,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Child Loop BB0_8 Depth 3
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
-; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_27 Depth 5
+; CHECK-NEXT: // Child Loop BB0_40 Depth 5
; CHECK-NEXT: str x12, [sp, #40] // 8-byte Spill
; CHECK-NEXT: cmn x24, #30
; CHECK-NEXT: mov x12, #-30 // =0xffffffffffffffe2
@@ -141,8 +141,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // => This Loop Header: Depth=3
; CHECK-NEXT: // Child Loop BB0_10 Depth 4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
-; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_27 Depth 5
+; CHECK-NEXT: // Child Loop BB0_40 Depth 5
; CHECK-NEXT: ldr x8, [sp, #64] // 8-byte Reload
; CHECK-NEXT: mov w14, #1152 // =0x480
; CHECK-NEXT: mov w24, #1 // =0x1
@@ -175,8 +175,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
; CHECK-NEXT: // => This Loop Header: Depth=4
; CHECK-NEXT: // Child Loop BB0_11 Depth 5
-; CHECK-NEXT: // Child Loop BB0_28 Depth 5
-; CHECK-NEXT: // Child Loop BB0_39 Depth 5
+; CHECK-NEXT: // Child Loop BB0_27 Depth 5
+; CHECK-NEXT: // Child Loop BB0_40 Depth 5
; CHECK-NEXT: ldr w8, [sp, #116] // 4-byte Reload
; CHECK-NEXT: and w8, w8, w8, asr #31
; CHECK-NEXT: str w8, [sp, #128] // 4-byte Spill
@@ -259,55 +259,44 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: .LBB0_24: // %if.end.us.7
; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
; CHECK-NEXT: mov x23, xzr
-; CHECK-NEXT: b .LBB0_28
+; CHECK-NEXT: b .LBB0_27
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_25: // %cond.true331.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
-; CHECK-NEXT: ldrsb w4, [x10]
-; CHECK-NEXT: .LBB0_26: // %cond.end345.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
-; CHECK-NEXT: strh w4, [x18]
-; CHECK-NEXT: mul x4, x22, x28
-; CHECK-NEXT: adrp x22, :got:var_46
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: ldr x22, [x22, :got_lo12:var_46]
-; CHECK-NEXT: str x4, [x22]
-; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
-; CHECK-NEXT: movk x4, #58909, lsl #16
-; CHECK-NEXT: .LBB0_27: // %for.inc371.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: .LBB0_25: // in Loop: Header=BB0_27 Depth=5
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: .LBB0_26: // %for.inc371.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: mov w22, #-18978 // =0xffffb5de
; CHECK-NEXT: orr x23, x23, #0x1
; CHECK-NEXT: mov x24, xzr
; CHECK-NEXT: mul w12, w12, w22
; CHECK-NEXT: mov x22, x5
-; CHECK-NEXT: tbz w0, #0, .LBB0_36
-; CHECK-NEXT: .LBB0_28: // %for.body194.us
+; CHECK-NEXT: tbz w0, #0, .LBB0_37
+; CHECK-NEXT: .LBB0_27: // %for.body194.us
; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
; CHECK-NEXT: // Parent Loop BB0_10 Depth=4
; CHECK-NEXT: // => This Inner Loop Header: Depth=5
-; CHECK-NEXT: cbnz wzr, .LBB0_30
-; CHECK-NEXT: // %bb.29: // %if.then222.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: cbnz wzr, .LBB0_29
+; CHECK-NEXT: // %bb.28: // %if.then222.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: adrp x27, :got:var_32
; CHECK-NEXT: ldur w8, [x19, #-12]
; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_32]
; CHECK-NEXT: strh w8, [x27]
; CHECK-NEXT: sxtb w8, w25
; CHECK-NEXT: bic w25, w8, w8, asr #31
-; CHECK-NEXT: b .LBB0_31
+; CHECK-NEXT: b .LBB0_30
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_30: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: .LBB0_29: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: mov w25, wzr
-; CHECK-NEXT: .LBB0_31: // %if.end239.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: .LBB0_30: // %if.end239.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: strb w3, [x16]
; CHECK-NEXT: tst w13, #0xff
-; CHECK-NEXT: b.eq .LBB0_33
-; CHECK-NEXT: // %bb.32: // %if.then254.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: b.eq .LBB0_32
+; CHECK-NEXT: // %bb.31: // %if.then254.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: ldrh w8, [x26, x14, lsl #1]
; CHECK-NEXT: adrp x27, :got:var_35
; CHECK-NEXT: ldr x27, [x27, :got_lo12:var_35]
@@ -315,8 +304,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: csel x8, xzr, x7, eq
; CHECK-NEXT: str x8, [x27]
; CHECK-NEXT: strh w1, [x17]
-; CHECK-NEXT: .LBB0_33: // %if.end282.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: .LBB0_32: // %if.end282.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: orr x27, x24, x4
; CHECK-NEXT: adrp x8, :got:var_39
; CHECK-NEXT: str x27, [x18]
@@ -324,16 +313,30 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: str x10, [x8]
; CHECK-NEXT: ldrb w8, [x6, x9]
; CHECK-NEXT: str x8, [x18]
+; CHECK-NEXT: cbnz x2, .LBB0_25
+; CHECK-NEXT: // %bb.33: // %if.then327.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: cbnz x2, .LBB0_27
-; CHECK-NEXT: // %bb.34: // %if.then327.us
-; CHECK-NEXT: // in Loop: Header=BB0_28 Depth=5
-; CHECK-NEXT: cbz w8, .LBB0_25
-; CHECK-NEXT: // %bb.35: // in Loop: Header=BB0_28 Depth=5
+; CHECK-NEXT: cbnz w8, .LBB0_35
+; CHECK-NEXT: // %bb.34: // %cond.true331.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
+; CHECK-NEXT: ldrsb w4, [x10]
+; CHECK-NEXT: b .LBB0_36
+; CHECK-NEXT: .LBB0_35: // in Loop: Header=BB0_27 Depth=5
; CHECK-NEXT: mov w4, wzr
+; CHECK-NEXT: .LBB0_36: // %cond.end345.us
+; CHECK-NEXT: // in Loop: Header=BB0_27 Depth=5
+; CHECK-NEXT: strh w4, [x18]
+; CHECK-NEXT: mul x4, x22, x28
+; CHECK-NEXT: adrp x22, :got:var_46
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: ldr x22, [x22, :got_lo12:var_46]
+; CHECK-NEXT: str x4, [x22]
+; CHECK-NEXT: mov x4, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: movk x4, #58909, lsl #16
; CHECK-NEXT: b .LBB0_26
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_36: // %for.cond376.preheader.us
+; CHECK-NEXT: .LBB0_37: // %for.cond376.preheader.us
; CHECK-NEXT: // in Loop: Header=BB0_10 Depth=4
; CHECK-NEXT: mov w3, #1152 // =0x480
; CHECK-NEXT: mov x22, xzr
@@ -344,24 +347,24 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: madd x14, x14, x3, x11
; CHECK-NEXT: mov w28, w30
; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
-; CHECK-NEXT: b .LBB0_39
+; CHECK-NEXT: b .LBB0_40
; CHECK-NEXT: .p2align 5, , 16
-; CHECK-NEXT: .LBB0_37: // %if.then466.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: .LBB0_38: // %if.then466.us
+; CHECK-NEXT: // in Loop: Header=BB0_40 Depth=5
; CHECK-NEXT: ldr x28, [sp, #152] // 8-byte Reload
; CHECK-NEXT: ldr x3, [sp, #136] // 8-byte Reload
; CHECK-NEXT: sxtb w4, w4
; CHECK-NEXT: bic w4, w4, w4, asr #31
; CHECK-NEXT: str x3, [x28]
; CHECK-NEXT: mov w3, #-7680 // =0xffffe200
-; CHECK-NEXT: .LBB0_38: // %for.inc505.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: .LBB0_39: // %for.inc505.us
+; CHECK-NEXT: // in Loop: Header=BB0_40 Depth=5
; CHECK-NEXT: add x22, x22, #1
; CHECK-NEXT: add x27, x27, #1
; CHECK-NEXT: mov w28, wzr
; CHECK-NEXT: cmp x27, #0
; CHECK-NEXT: b.hs .LBB0_9
-; CHECK-NEXT: .LBB0_39: // %for.body380.us
+; CHECK-NEXT: .LBB0_40: // %for.body380.us
; CHECK-NEXT: // Parent Loop BB0_4 Depth=1
; CHECK-NEXT: // Parent Loop BB0_6 Depth=2
; CHECK-NEXT: // Parent Loop BB0_8 Depth=3
@@ -373,18 +376,18 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
; CHECK-NEXT: strh w28, [x11]
; CHECK-NEXT: csel w28, w21, w3, ne
; CHECK-NEXT: str w28, [x20]
-; CHECK-NEXT: cbz x15, .LBB0_38
-; CHECK-NEXT: // %bb.40: // %if.then436.us
-; CHECK-NEXT: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: cbz x15, .LBB0_39
+; CHECK-NEXT: // %bb.41: // %if.then436.us
+; CHECK-NEXT: // in Loop: Header=BB0_40 Depth=5
; CHECK-NEXT: ldrh w28, [x14]
-; CHECK-NEXT: cbnz w28, .LBB0_37
-; CHECK-NEXT: // %bb.41: // in Loop: Header=BB0_39 Depth=5
+; CHECK-NEXT: cbnz w28, .LBB0_38
+; CHECK-NEXT: // %bb.42: // in Loop: Header=BB0_40 Depth=5
; CHECK-NEXT: mov w4, wzr
-; CHECK-NEXT: b .LBB0_38
-; CHECK-NEXT: .LBB0_42: // %for.body41
+; CHECK-NEXT: b .LBB0_39
+; CHECK-NEXT: .LBB0_43: // %for.body41
; CHECK-NEXT: strb wzr, [x4]
; CHECK-NEXT: strb wzr, [x14]
-; CHECK-NEXT: .LBB0_43: // %for.cond563.preheader
+; CHECK-NEXT: .LBB0_44: // %for.cond563.preheader
; CHECK-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index d09ef0e2c6ac0..c12dbe488263f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -1532,11 +1532,11 @@ define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float
; CHECK-NEXT: # kill: def $f7 killed $f7 def $vr7
; CHECK-NEXT: # kill: def $f6 killed $f6 def $vr6
; CHECK-NEXT: # kill: def $f5 killed $f5 def $vr5
-; CHECK-NEXT: # kill: def $f4 killed $f4 def $xr4
+; CHECK-NEXT: # kill: def $f4 killed $f4 def $vr4 def $xr4
; CHECK-NEXT: # kill: def $f3 killed $f3 def $vr3
; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; CHECK-NEXT: vextrins.w $vr4, $vr5, 16
; CHECK-NEXT: vextrins.w $vr4, $vr6, 32
; CHECK-NEXT: vextrins.w $vr4, $vr7, 48
@@ -1619,7 +1619,7 @@ define void @buildvector_v8f32_subseq_2(ptr %dst, float %a0, float %a1, float %a
; CHECK-NEXT: # kill: def $f3 killed $f3 def $vr3
; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
; CHECK-NEXT: vextrins.w $vr0, $vr2, 32
; CHECK-NEXT: vextrins.w $vr0, $vr3, 48
@@ -1643,7 +1643,7 @@ define void @buildvector_v8f32_subseq_4(ptr %dst, float %a0, float %a1) nounwind
; CHECK-LABEL: buildvector_v8f32_subseq_4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
@@ -1665,9 +1665,9 @@ define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, dou
; CHECK-LABEL: buildvector_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $f3_64 killed $f3_64 def $vr3
-; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $xr2
+; CHECK-NEXT: # kill: def $f2_64 killed $f2_64 def $vr2 def $xr2
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; CHECK-NEXT: vextrins.d $vr2, $vr3, 16
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
@@ -1722,7 +1722,7 @@ define void @buildvector_v4f64_subseq_2(ptr %dst, double %a0, double %a1) nounwi
; CHECK-LABEL: buildvector_v4f64_subseq_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $vr1
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: xvreplve0.q $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 45b25013c9173..d01848e8547b6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -22,7 +22,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl __powisf2
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
; LA32-NEXT: vextrins.w $vr0, $vr1, 16
; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
@@ -56,7 +56,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl __powisf2
-; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
; LA32-NEXT: vextrins.w $vr0, $vr1, 16
; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
@@ -105,7 +105,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
; LA64-NEXT: vextrins.w $vr0, $vr1, 16
; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
@@ -143,7 +143,7 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
; LA64-NEXT: vextrins.w $vr0, $vr1, 16
; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
@@ -198,7 +198,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl __powidf2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
; LA32-NEXT: vextrins.d $vr0, $vr1, 16
; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
@@ -214,7 +214,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl __powidf2
-; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
; LA32-NEXT: vextrins.d $vr0, $vr1, 16
; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
@@ -244,7 +244,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
; LA64-NEXT: vextrins.d $vr0, $vr1, 16
; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
@@ -262,7 +262,7 @@ define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
; LA64-NEXT: move $a0, $fp
; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
; LA64-NEXT: jirl $ra, $ra, 0
-; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
; LA64-NEXT: vextrins.d $vr0, $vr1, 16
; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
index bba269279937a..be5d42bdfb975 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll
@@ -49,7 +49,7 @@ define <4 x i64> @scalar_to_4xi64(i64 %val) {
define <8 x float> @scalar_to_8xf32(float %val) {
; CHECK-LABEL: scalar_to_8xf32:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 def $xr0
; CHECK-NEXT: ret
%ret = insertelement <8 x float> poison, float %val, i32 0
ret <8 x float> %ret
@@ -58,7 +58,7 @@ define <8 x float> @scalar_to_8xf32(float %val) {
define <4 x double> @scalar_to_4xf64(double %val) {
; CHECK-LABEL: scalar_to_4xf64:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 def $xr0
; CHECK-NEXT: ret
%ret = insertelement <4 x double> poison, double %val, i32 0
ret <4 x double> %ret
diff --git a/llvm/test/CodeGen/PowerPC/half.ll b/llvm/test/CodeGen/PowerPC/half.ll
index 903ea691ae6ba..8b5b7962da33f 100644
--- a/llvm/test/CodeGen/PowerPC/half.ll
+++ b/llvm/test/CodeGen/PowerPC/half.ll
@@ -1365,6 +1365,7 @@ define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
; P8-NEXT: bl __extendhfsf2
; P8-NEXT: nop
; P8-NEXT: li r3, 80
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
; P8-NEXT: xxmrghd vs0, vs61, vs1
; P8-NEXT: xxmrghd vs1, vs63, vs62
; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
index e4fb812486c25..c3bc951fdcbbf 100644
--- a/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir
@@ -145,16 +145,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
- ; CHECK-NEXT: TEST64rr [[MOV32r0_1]], [[MOV32r0_1]], implicit-def $eflags
- ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY [[MOV32r0_1]]
+ ; CHECK-NEXT: undef [[MOV32r0_1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags
+ ; CHECK-NEXT: TEST64rr [[COPY1]], [[COPY1]], implicit-def $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]].sub_8bit:gr64_with_sub_8bit = SETCCr 4, implicit killed $eflags
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
; CHECK-NEXT: CALL64r [[MOV32r0_]], csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_2]], 4, implicit-def dead $eflags
- ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_2]], [[COPY]], implicit-def dead $eflags
- ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = COPY [[MOV32r0_2]]
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32r0_1]], 4, implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr64_with_sub_8bit = ADD64rr [[MOV32r0_1]], [[COPY]], implicit-def dead $eflags
; CHECK-NEXT: JMP_1 %bb.1
bb.0:
liveins: $rdi
>From 9478968f2262136f5fe3a738101885de0daef6c7 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 27 Oct 2025 16:38:43 +0000
Subject: [PATCH 3/5] Fix an issue where we only add an implicit-def if there
is a sub-idx
There's no point in doing it for something like this:
%1:gpr64sp = ORRXri %0, 8128
%3:gpr64 = SUBREG_TO_REG 0, %1.sub_32, %subreg.sub_32
where ORRXri already writes to a gpr64 register class (although one that
also includes the sp).
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 11 +++++-----
...er-coalesce-implicit-def-subreg-to-reg.mir | 22 +++++++++++++++++++
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index fb4175a1c3452..47921da9fc8fc 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1968,12 +1968,11 @@ void RegisterCoalescer::updateRegDefsUses(
//
// Because that would thrash the top 24 bits of %1.sub32.
if (is_contained(SubregToRegSrcInsts, UseMI) &&
- all_of(UseMI->defs(),
- [&SubIdx, &SrcReg](const MachineOperand &MO) -> bool {
- if (MO.getReg() != SrcReg || !MO.getSubReg() || MO.isUndef())
- return true;
- return SubIdx == MO.getSubReg();
- })) {
+ all_of(UseMI->defs(), [&SubIdx](const MachineOperand &MO) -> bool {
+ if (MO.isUndef())
+ return true;
+ return SubIdx && (!MO.getSubReg() || SubIdx == MO.getSubReg());
+ })) {
// Add implicit-def of super-register to express that the whole
// register is defined by the instruction.
MachineInstrBuilder MIB(*MF, UseMI);
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
index aecb90adecd71..a58a23068896b 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
@@ -21,3 +21,25 @@ body: |
$x1 = COPY killed %193:gpr64all
RET_ReallyLR implicit $x1, implicit $x0
...
+
+# In this test, we should avoid adding an implicit-def to ORRXri, because
+# the register class will already be gpr64sp.
+---
+name: test2
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test2
+ ; CHECK: renamable $x8 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $x9 = ORRXri renamable $x8, 8128
+ ; CHECK-NEXT: $x2 = ORRXri renamable $x8, 8128
+ ; CHECK-NEXT: RET_ReallyLR implicit killed renamable $x8, implicit killed renamable $x9
+ %0:gpr64 = IMPLICIT_DEF
+ %1:gpr64sp = ORRXri %0, 8128
+ %3:gpr64 = SUBREG_TO_REG 0, %1.sub_32, %subreg.sub_32
+ %2:gpr64all = COPY killed %1
+ $x2 = COPY killed %2
+ RET_ReallyLR implicit %0, implicit %3
+...
>From aebc086b73d15232bfd0c8e8f8099343b28c91a4 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 14 Nov 2025 15:19:29 +0000
Subject: [PATCH 4/5] addDeadDef should iterate over all defs (including
implicit ones)
otherwise it results in a machine verifier failure when
splitting the liverange and rematerialising a MOVi32imm
instruction.
This is only an issue when enabling subreg liveness tracking.
---
llvm/lib/CodeGen/SplitKit.cpp | 2 +-
llvm/test/CodeGen/AArch64/pr164181-reduced.ll | 183 ++++++++++++++++++
2 files changed, 184 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/pr164181-reduced.ll
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 8ec4bfbb5a330..cf064b90a7d34 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -448,7 +448,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
assert(DefMI != nullptr);
LaneBitmask LM;
- for (const MachineOperand &DefOp : DefMI->defs()) {
+ for (const MachineOperand &DefOp : DefMI->all_defs()) {
Register R = DefOp.getReg();
if (R != LI.reg())
continue;
diff --git a/llvm/test/CodeGen/AArch64/pr164181-reduced.ll b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll
new file mode 100644
index 0000000000000..192893e6a08cc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr164181-reduced.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+;
+; This is a reduced version of pr164181.ll, which failed with subreg liveness enabled
+; after adding the implicit-def for a SUBREG_TO_REG to mark the top 32-bits of a register
+; being written by a MOVi32imm instruction. This previously failed the machine verifier
+; because the liverange for the top 32-bits weren't updated when rematerializing the
+; MOVi32imm.
+;
+; RUN: llc -verify-machineinstrs -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -enable-subreg-liveness=false < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @f(i1 %var_0, i64 %var_2, i64 %var_11, ptr %arr_3, ptr %arr_4, ptr %arr_7, ptr %arr_13, ptr %invariant.gep875.us, ptr %arrayidx384.us, i16 %0, i1 %tobool435.not.us, ptr %gep876.us, i16 %cond464.in.us, ptr %1, i16 %conv227.us, i1 %cmp378.us) #0 {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w23, -40
+; CHECK-NEXT: .cfi_offset w24, -48
+; CHECK-NEXT: .cfi_offset w25, -56
+; CHECK-NEXT: .cfi_offset w26, -64
+; CHECK-NEXT: .cfi_offset w27, -72
+; CHECK-NEXT: .cfi_offset w28, -80
+; CHECK-NEXT: .cfi_offset w30, -96
+; CHECK-NEXT: ldrb w9, [sp, #152]
+; CHECK-NEXT: ldrh w10, [sp, #144]
+; CHECK-NEXT: mov x19, #-18403 // =0xffffffffffffb81d
+; CHECK-NEXT: ldr x11, [sp, #136]
+; CHECK-NEXT: ldrh w12, [sp, #128]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: ldr x13, [sp, #120]
+; CHECK-NEXT: ldrb w14, [sp, #112]
+; CHECK-NEXT: mov w15, wzr
+; CHECK-NEXT: ldrh w16, [sp, #104]
+; CHECK-NEXT: ldr x17, [sp, #96]
+; CHECK-NEXT: mov w18, #149 // =0x95
+; CHECK-NEXT: movk x19, #58909, lsl #16
+; CHECK-NEXT: mov w20, #-18978 // =0xffffb5de
+; CHECK-NEXT: mov w21, #1 // =0x1
+; CHECK-NEXT: mov w22, #-7680 // =0xffffe200
+; CHECK-NEXT: mov w23, #36006 // =0x8ca6
+; CHECK-NEXT: mov x25, xzr
+; CHECK-NEXT: mov x24, xzr
+; CHECK-NEXT: .LBB0_1: // %for.body99.us
+; CHECK-NEXT: // =>This Loop Header: Depth=1
+; CHECK-NEXT: // Child Loop BB0_4 Depth 2
+; CHECK-NEXT: // Child Loop BB0_10 Depth 2
+; CHECK-NEXT: mov w27, w15
+; CHECK-NEXT: mov x26, x25
+; CHECK-NEXT: mov x28, x24
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .LBB0_2: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: mov w25, #1 // =0x1
+; CHECK-NEXT: .LBB0_3: // %for.inc371.us
+; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: mul w27, w15, w20
+; CHECK-NEXT: mov x28, xzr
+; CHECK-NEXT: mov x26, x2
+; CHECK-NEXT: tbz w0, #0, .LBB0_9
+; CHECK-NEXT: .LBB0_4: // %for.body194.us
+; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: // => This Inner Loop Header: Depth=2
+; CHECK-NEXT: orr x15, x28, x19
+; CHECK-NEXT: mov x24, x28
+; CHECK-NEXT: strh w10, [x13]
+; CHECK-NEXT: strb w18, [x5]
+; CHECK-NEXT: str x15, [x4]
+; CHECK-NEXT: mov w15, w27
+; CHECK-NEXT: str x8, [x11]
+; CHECK-NEXT: str x1, [x3]
+; CHECK-NEXT: tbz w14, #0, .LBB0_2
+; CHECK-NEXT: // %bb.5: // %if.then327.us
+; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: cbnz w21, .LBB0_7
+; CHECK-NEXT: // %bb.6: // %cond.true331.us
+; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: ldrsb w27, [x8]
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .LBB0_7: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: mov w27, wzr
+; CHECK-NEXT: .LBB0_8: // %cond.end345.us
+; CHECK-NEXT: // in Loop: Header=BB0_4 Depth=2
+; CHECK-NEXT: mov x25, xzr
+; CHECK-NEXT: strh w27, [x3]
+; CHECK-NEXT: str x26, [x6]
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_9: // %for.cond376.preheader.us
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: mov x26, xzr
+; CHECK-NEXT: mov w27, wzr
+; CHECK-NEXT: .LBB0_10: // %for.body380.us
+; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: // => This Inner Loop Header: Depth=2
+; CHECK-NEXT: ands w28, w0, #0x1
+; CHECK-NEXT: orr x26, x26, #0x1
+; CHECK-NEXT: strh w16, [x7]
+; CHECK-NEXT: csel w30, w23, w22, ne
+; CHECK-NEXT: tst w12, #0xffff
+; CHECK-NEXT: csel w21, wzr, w27, eq
+; CHECK-NEXT: cmp w28, #0
+; CHECK-NEXT: str w30, [x17]
+; CHECK-NEXT: csel w27, w27, w21, ne
+; CHECK-NEXT: tbnz w9, #0, .LBB0_10
+; CHECK-NEXT: // %bb.11: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: mov w21, #1 // =0x1
+; CHECK-NEXT: b .LBB0_1
+entry:
+ br label %for.body99.us
+
+for.body99.us: ; preds = %for.inc505.us, %entry
+ %mul287985.us = phi i16 [ 0, %entry ], [ %mul287986.us, %for.inc505.us ]
+ %mul354905.us = phi i64 [ 0, %entry ], [ %mul354907.us, %for.inc505.us ]
+ %sub283896.us = phi i64 [ 0, %entry ], [ %sub283897.us, %for.inc505.us ]
+ %conv96880.us4 = phi i64 [ 0, %entry ], [ 0, %for.inc505.us ]
+ br label %for.body194.us
+
+for.body380.us: ; preds = %for.cond376.preheader.us, %for.inc505.us
+ %indvars.iv10181 = phi i64 [ 0, %for.cond376.preheader.us ], [ %indvars.iv.next1019, %for.inc505.us ]
+ %2 = phi i8 [ 0, %for.cond376.preheader.us ], [ %3, %for.inc505.us ]
+ store i16 %0, ptr %invariant.gep875.us, align 2
+ %arrayidx416.us = getelementptr i16, ptr %arr_13, i64 %indvars.iv10181
+ %conv419.us = select i1 %var_0, i32 36006, i32 -7680
+ store i32 %conv419.us, ptr %arrayidx384.us, align 4
+ br i1 %var_0, label %for.inc505.us, label %if.then436.us
+
+if.then436.us: ; preds = %for.body380.us
+ %cond464.in.us6 = load i16, ptr null, align 2
+ %tobool465.not.us = icmp eq i16 %cond464.in.us, 0
+ %spec.select = select i1 %tobool465.not.us, i8 0, i8 %2
+ br label %for.inc505.us
+
+for.inc505.us: ; preds = %if.then436.us, %for.body380.us
+ %3 = phi i8 [ %2, %for.body380.us ], [ %spec.select, %if.then436.us ]
+ %indvars.iv.next1019 = or i64 %indvars.iv10181, 1
+ br i1 %cmp378.us, label %for.body380.us, label %for.body99.us
+
+for.body194.us: ; preds = %for.inc371.us, %for.body99.us
+ %mul287986.us = phi i16 [ %mul287985.us, %for.body99.us ], [ %mul287.us, %for.inc371.us ]
+ %mul354906.us = phi i64 [ %mul354905.us, %for.body99.us ], [ %var_11, %for.inc371.us ]
+ %sub283897.us = phi i64 [ %sub283896.us, %for.body99.us ], [ 0, %for.inc371.us ]
+ store i16 %conv227.us, ptr %gep876.us, align 2
+ store i8 -107, ptr %arr_7, align 1
+ %sub283.us = or i64 %sub283897.us, -434259939
+ store i64 %sub283.us, ptr %arr_4, align 8
+ %mul287.us = mul i16 %mul287986.us, -18978
+ store i64 0, ptr %1, align 8
+ store i64 %var_2, ptr %arr_3, align 8
+ br i1 %tobool435.not.us, label %if.then327.us, label %for.inc371.us
+
+if.then327.us: ; preds = %for.body194.us
+ %tobool330.not.us = icmp eq i32 0, 0
+ br i1 %tobool330.not.us, label %cond.end345.us, label %cond.true331.us
+
+cond.true331.us: ; preds = %if.then327.us
+ %4 = load i8, ptr null, align 1
+ %5 = sext i8 %4 to i16
+ br label %cond.end345.us
+
+cond.end345.us: ; preds = %cond.true331.us, %if.then327.us
+ %cond346.us = phi i16 [ %5, %cond.true331.us ], [ 0, %if.then327.us ]
+ store i16 %cond346.us, ptr %arr_3, align 2
+ store i64 %mul354906.us, ptr %arr_13, align 8
+ br label %for.inc371.us
+
+for.inc371.us: ; preds = %cond.end345.us, %for.body194.us
+ %mul354907.us = phi i64 [ 1, %for.body194.us ], [ 0, %cond.end345.us ]
+ br i1 %var_0, label %for.body194.us, label %for.cond376.preheader.us
+
+for.cond376.preheader.us: ; preds = %for.inc371.us
+ %arrayidx384.us9 = getelementptr i16, ptr null, i64 %conv96880.us4
+ br label %for.body380.us
+}
+
+attributes #0 = { "frame-pointer"="non-leaf" }
>From 78746b77beea4239a698b8ce3bdf4ed34691b02d Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 18 Nov 2025 12:18:03 +0000
Subject: [PATCH 5/5] Address suggestions
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 49 +++++++++++++++-----------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 47921da9fc8fc..8ae084b7582dc 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1966,7 +1966,8 @@ void RegisterCoalescer::updateRegDefsUses(
// %1.sub32:gr64 = INSTX
// undef %1.sub8:gr64 = INSTY , implicit-def %1
//
- // Because that would thrash the top 24 bits of %1.sub32.
+ // because the undef means that none of the bits of %1 are read, thus
+ // thrashing the top 24 bits of %1.sub32.
if (is_contained(SubregToRegSrcInsts, UseMI) &&
all_of(UseMI->defs(), [&SubIdx](const MachineOperand &MO) -> bool {
if (MO.isUndef())
@@ -2009,7 +2010,7 @@ void RegisterCoalescer::updateRegDefsUses(
// 32B %1:gpr32 = MOVIMM32 ..
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
//
- // Only the MOVIMM32 require a def of the top lanes and any intervals
+ // Only the MOVIMM32 requires a def of the top lanes and any intervals
// for the top 32-bits of the def at 16B should be removed.
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
if (!Writes || RequiresImplicitRedef ||
@@ -2019,7 +2020,7 @@ void RegisterCoalescer::updateRegDefsUses(
assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
"Unexpected lanemask. Subrange needs finer granularity");
- SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(false);
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot();
auto SegmentI = SR.find(UseIdx);
if (SegmentI != SR.end())
SR.removeSegment(SegmentI, true);
@@ -2140,26 +2141,33 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
/// For a given use of value \p Idx, it returns the def in the current block,
/// or otherwise all possible defs in preceding blocks.
-static bool FindDefInBlock(SmallPtrSetImpl<MachineBasicBlock *> &VisitedBlocks,
- SmallVector<MachineInstr *> &Instrs,
- LiveIntervals *LIS, LiveInterval &SrcInt,
- MachineBasicBlock *MBB, VNInfo *Idx) {
- if (!Idx->isPHIDef()) {
+static bool findPrecedingDefs(SmallVector<MachineInstr *> &Instrs,
+ LiveIntervals *LIS, LiveInterval &SrcInt,
+ MachineBasicBlock *MBB, VNInfo *Idx) {
+ auto IsPrecedingDef = [&](VNInfo *Idx) -> bool {
+ if (Idx->isPHIDef())
+ return false;
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
Instrs.push_back(Def);
return true;
- }
+ };
- bool Any = false;
- if (VisitedBlocks.count(MBB))
- return false;
- VisitedBlocks.insert(MBB);
- for (MachineBasicBlock *Pred : MBB->predecessors()) {
- Any |= FindDefInBlock(VisitedBlocks, Instrs, LIS, SrcInt, Pred,
- SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Pred)));
+ if (IsPrecedingDef(Idx))
+ return true;
+
+ SmallVector<MachineBasicBlock *> Worklist = {MBB};
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
+ for (unsigned I = 0; I < Worklist.size(); ++I) {
+ if (VisitedBlocks.count(Worklist[I]))
+ continue;
+ VisitedBlocks.insert(Worklist[I]);
+ VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(Worklist[I]));
+ if (!IsPrecedingDef(Idx))
+ Worklist.append(Worklist[I]->pred_begin(), Worklist[I]->pred_end());
}
- return Any;
+
+ return !Instrs.empty();
}
bool RegisterCoalescer::joinCopy(
@@ -2321,10 +2329,9 @@ bool RegisterCoalescer::joinCopy(
// // do with the SUBREG_TO_REG.
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
- if (!FindDefInBlock(VisitedBlocks, SubregToRegSrcInsts, LIS, SrcInt,
- CopyMI->getParent(),
- SrcInt.Query(SubregToRegSlotIdx).valueIn()))
+ if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt,
+ CopyMI->getParent(),
+ SrcInt.Query(SubregToRegSlotIdx).valueIn()))
llvm_unreachable("SUBREG_TO_REG src requires a def");
}
More information about the llvm-commits
mailing list